From a3cc9fcff84c4c8aaecda2420acd89a1418d57e9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:16 +0200 Subject: TTY: ircomm, add tty_port And use close/open_wait from there. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 21 +++++++++++---------- net/irda/ircomm/ircomm_tty_attach.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 6b9d5a0e42f9..8eeaa8b7f4a6 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -278,7 +278,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, */ retval = 0; - add_wait_queue(&self->open_wait, &wait); + add_wait_queue(&self->port.open_wait, &wait); IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n", __FILE__,__LINE__, tty->driver->name, self->open_count ); @@ -336,7 +336,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, } __set_current_state(TASK_RUNNING); - remove_wait_queue(&self->open_wait, &wait); + remove_wait_queue(&self->port.open_wait, &wait); if (extra_count) { /* ++ is not atomic, so this should be protected - Jean II */ @@ -381,6 +381,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) return -ENOMEM; } + tty_port_init(&self->port); self->magic = IRCOMM_TTY_MAGIC; self->flow = FLOW_STOP; @@ -393,8 +394,6 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) /* Init some important stuff */ init_timer(&self->watchdog_timer); - init_waitqueue_head(&self->open_wait); - init_waitqueue_head(&self->close_wait); spin_lock_init(&self->spinlock); /* @@ -408,6 +407,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) tty->termios->c_oflag = 0; /* Insert into hash */ + /* FIXME there is a window from find to here */ hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL); } /* ++ is not atomic, so this should be protected - Jean II */ @@ -438,7 +438,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * probably better sleep uninterruptible? */ - if (wait_event_interruptible(self->close_wait, !test_bit(ASYNC_B_CLOSING, &self->flags))) { + if (wait_event_interruptible(self->port.close_wait, + !test_bit(ASYNC_B_CLOSING, &self->flags))) { IRDA_WARNING("%s - got signal while blocking on ASYNC_CLOSING!\n", __func__); return -ERESTARTSYS; @@ -559,11 +560,11 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) if (self->blocked_open) { if (self->close_delay) schedule_timeout_interruptible(self->close_delay); - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } self->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&self->close_wait); + wake_up_interruptible(&self->port.close_wait); } /* @@ -1011,7 +1012,7 @@ static void ircomm_tty_hangup(struct tty_struct *tty) self->open_count = 0; spin_unlock_irqrestore(&self->spinlock, flags); - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } /* @@ -1084,7 +1085,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) (status & IRCOMM_CD) ? "on" : "off"); if (status & IRCOMM_CD) { - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } else { IRDA_DEBUG(2, "%s(), Doing serial hangup..\n", __func__ ); @@ -1103,7 +1104,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) tty->hw_stopped = 0; /* Wake up processes blocked on open */ - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); schedule_work(&self->tqueue); return; diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index b65d66e0d817..bb1e9356bb18 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -575,7 +575,7 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) self->tty->hw_stopped = 0; /* Wake up processes blocked on open */ - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } schedule_work(&self->tqueue); -- cgit v1.2.1 From 2a0213cb1e1ca6b2838595b0d70f09ecc4953ba9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:17 +0200 Subject: TTY: ircomm, use close times from tty_port Switch to tty_port->close_delay and closing_wait. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 10 ++++------ net/irda/ircomm/ircomm_tty_ioctl.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 8eeaa8b7f4a6..61e0adcab964 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -389,8 +389,6 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) INIT_WORK(&self->tqueue, ircomm_tty_do_softint); self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED; self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED; - self->close_delay = 5*HZ/10; - self->closing_wait = 30*HZ; /* Init some important stuff */ init_timer(&self->watchdog_timer); @@ -546,8 +544,8 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) * the line discipline to only process XON/XOFF characters. */ tty->closing = 1; - if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent_from_close(tty, self->closing_wait); + if (self->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) + tty_wait_until_sent_from_close(tty, self->port.closing_wait); ircomm_tty_shutdown(self); @@ -558,8 +556,8 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) self->tty = NULL; if (self->blocked_open) { - if (self->close_delay) - schedule_timeout_interruptible(self->close_delay); + if (self->port.close_delay) + schedule_timeout_interruptible(self->port.close_delay); wake_up_interruptible(&self->port.open_wait); } diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index d0667d68351d..a6d25e37b6b2 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -272,8 +272,8 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self, info.line = self->line; info.flags = self->flags; info.baud_base = self->settings.data_rate; - info.close_delay = self->close_delay; - info.closing_wait = self->closing_wait; + info.close_delay = self->port.close_delay; + info.closing_wait = self->port.closing_wait; /* For compatibility */ info.type = PORT_16550A; -- cgit v1.2.1 From 580d27b449cb8f540bba1cc54066bb44f4e6242d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:18 +0200 Subject: TTY: ircomm, use open counts from tty_port Switch to tty_port->count and blocked_open. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 61e0adcab964..787578f9f312 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -272,7 +272,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, self->open_count is dropped by one, so that + * this loop, self->port.count is dropped by one, so that * mgsl_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ @@ -281,16 +281,16 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, add_wait_queue(&self->port.open_wait, &wait); IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count ); + __FILE__, __LINE__, tty->driver->name, self->port.count); - /* As far as I can see, we protect open_count - Jean II */ + /* As far as I can see, we protect port.count - Jean II */ spin_lock_irqsave(&self->spinlock, flags); if (!tty_hung_up_p(filp)) { extra_count = 1; - self->open_count--; + self->port.count--; } spin_unlock_irqrestore(&self->spinlock, flags); - self->blocked_open++; + self->port.blocked_open++; while (1) { if (tty->termios->c_cflag & CBAUD) { @@ -330,7 +330,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, } IRDA_DEBUG(1, "%s(%d):block_til_ready blocking on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count ); + __FILE__, __LINE__, tty->driver->name, self->port.count); schedule(); } @@ -341,13 +341,13 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, if (extra_count) { /* ++ is not atomic, so this should be protected - Jean II */ spin_lock_irqsave(&self->spinlock, flags); - self->open_count++; + self->port.count++; spin_unlock_irqrestore(&self->spinlock, flags); } - self->blocked_open--; + self->port.blocked_open--; IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count); + __FILE__, __LINE__, tty->driver->name, self->port.count); if (!retval) self->flags |= ASYNC_NORMAL_ACTIVE; @@ -410,14 +410,14 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) } /* ++ is not atomic, so this should be protected - Jean II */ spin_lock_irqsave(&self->spinlock, flags); - self->open_count++; + self->port.count++; tty->driver_data = self; self->tty = tty; spin_unlock_irqrestore(&self->spinlock, flags); IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __func__ , tty->driver->name, - self->line, self->open_count); + self->line, self->port.count); /* Not really used by us, but lets do it anyway */ self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0; @@ -504,7 +504,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) return; } - if ((tty->count == 1) && (self->open_count != 1)) { + if ((tty->count == 1) && (self->port.count != 1)) { /* * Uh, oh. tty->count is 1, which means that the tty * structure will be freed. state->count should always @@ -514,16 +514,16 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) */ IRDA_DEBUG(0, "%s(), bad serial port count; " "tty->count is 1, state->count is %d\n", __func__ , - self->open_count); - self->open_count = 1; + self->port.count); + self->port.count = 1; } - if (--self->open_count < 0) { + if (--self->port.count < 0) { IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n", - __func__, self->line, self->open_count); - self->open_count = 0; + __func__, self->line, self->port.count); + self->port.count = 0; } - if (self->open_count) { + if (self->port.count) { spin_unlock_irqrestore(&self->spinlock, flags); IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ ); @@ -555,7 +555,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) tty->closing = 0; self->tty = NULL; - if (self->blocked_open) { + if (self->port.blocked_open) { if (self->port.close_delay) schedule_timeout_interruptible(self->port.close_delay); wake_up_interruptible(&self->port.open_wait); @@ -1007,7 +1007,7 @@ static void ircomm_tty_hangup(struct tty_struct *tty) spin_lock_irqsave(&self->spinlock, flags); self->flags &= ~ASYNC_NORMAL_ACTIVE; self->tty = NULL; - self->open_count = 0; + self->port.count = 0; spin_unlock_irqrestore(&self->spinlock, flags); wake_up_interruptible(&self->port.open_wait); @@ -1354,7 +1354,7 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) seq_putc(m, '\n'); seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); - seq_printf(m, "Open count: %d\n", self->open_count); + seq_printf(m, "Open count: %d\n", self->port.count); seq_printf(m, "Max data size: %d\n", self->max_data_size); seq_printf(m, "Max header size: %d\n", self->max_header_size); -- cgit v1.2.1 From 849d5a997fe6a9e44401daed62a98121390ec0d3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:19 +0200 Subject: TTY: ircomm, use flags from tty_port Switch to tty_port->flags. And while at it, remove redefined flags for them. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 46 ++++++++++++++++++------------------- net/irda/ircomm/ircomm_tty_attach.c | 5 ++-- net/irda/ircomm/ircomm_tty_ioctl.c | 10 ++++---- 3 files changed, 31 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 787578f9f312..8e61026b9dd4 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -194,7 +194,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self) IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); /* Check if already open */ - if (test_and_set_bit(ASYNC_B_INITIALIZED, &self->flags)) { + if (test_and_set_bit(ASYNCB_INITIALIZED, &self->port.flags)) { IRDA_DEBUG(2, "%s(), already open so break out!\n", __func__ ); return 0; } @@ -231,7 +231,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self) return 0; err: - clear_bit(ASYNC_B_INITIALIZED, &self->flags); + clear_bit(ASYNCB_INITIALIZED, &self->port.flags); return ret; } @@ -260,7 +260,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, */ if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ - self->flags |= ASYNC_NORMAL_ACTIVE; + self->port.flags |= ASYNC_NORMAL_ACTIVE; IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ ); return 0; } @@ -306,8 +306,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, current->state = TASK_INTERRUPTIBLE; if (tty_hung_up_p(filp) || - !test_bit(ASYNC_B_INITIALIZED, &self->flags)) { - retval = (self->flags & ASYNC_HUP_NOTIFY) ? + !test_bit(ASYNCB_INITIALIZED, &self->port.flags)) { + retval = (self->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } @@ -317,7 +317,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * specified, we cannot return before the IrCOMM link is * ready */ - if (!test_bit(ASYNC_B_CLOSING, &self->flags) && + if (!test_bit(ASYNCB_CLOSING, &self->port.flags) && (do_clocal || (self->settings.dce & IRCOMM_CD)) && self->state == IRCOMM_TTY_READY) { @@ -350,7 +350,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __FILE__, __LINE__, tty->driver->name, self->port.count); if (!retval) - self->flags |= ASYNC_NORMAL_ACTIVE; + self->port.flags |= ASYNC_NORMAL_ACTIVE; return retval; } @@ -420,13 +420,13 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) self->line, self->port.count); /* Not really used by us, but lets do it anyway */ - self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; /* * If the port is the middle of closing, bail out now */ if (tty_hung_up_p(filp) || - test_bit(ASYNC_B_CLOSING, &self->flags)) { + test_bit(ASYNCB_CLOSING, &self->port.flags)) { /* Hm, why are we blocking on ASYNC_CLOSING if we * do return -EAGAIN/-ERESTARTSYS below anyway? @@ -437,14 +437,14 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) */ if (wait_event_interruptible(self->port.close_wait, - !test_bit(ASYNC_B_CLOSING, &self->flags))) { + !test_bit(ASYNCB_CLOSING, &self->port.flags))) { IRDA_WARNING("%s - got signal while blocking on ASYNC_CLOSING!\n", __func__); return -ERESTARTSYS; } #ifdef SERIAL_DO_RESTART - return (self->flags & ASYNC_HUP_NOTIFY) ? + return (self->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; #else return -EAGAIN; @@ -531,7 +531,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) } /* Hum... Should be test_and_set_bit ??? - Jean II */ - set_bit(ASYNC_B_CLOSING, &self->flags); + set_bit(ASYNCB_CLOSING, &self->port.flags); /* We need to unlock here (we were unlocking at the end of this * function), because tty_wait_until_sent() may schedule. @@ -561,7 +561,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) wake_up_interruptible(&self->port.open_wait); } - self->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + self->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); wake_up_interruptible(&self->port.close_wait); } @@ -954,7 +954,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self) IRDA_DEBUG(0, "%s()\n", __func__ ); - if (!test_and_clear_bit(ASYNC_B_INITIALIZED, &self->flags)) + if (!test_and_clear_bit(ASYNCB_INITIALIZED, &self->port.flags)) return; ircomm_tty_detach_cable(self); @@ -1005,7 +1005,7 @@ static void ircomm_tty_hangup(struct tty_struct *tty) /* I guess we need to lock here - Jean II */ spin_lock_irqsave(&self->spinlock, flags); - self->flags &= ~ASYNC_NORMAL_ACTIVE; + self->port.flags &= ~ASYNC_NORMAL_ACTIVE; self->tty = NULL; self->port.count = 0; spin_unlock_irqrestore(&self->spinlock, flags); @@ -1077,7 +1077,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) if (status & IRCOMM_DCE_DELTA_ANY) { /*wake_up_interruptible(&self->delta_msr_wait);*/ } - if ((self->flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) { + if ((self->port.flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) { IRDA_DEBUG(2, "%s(), ircomm%d CD now %s...\n", __func__ , self->line, (status & IRCOMM_CD) ? "on" : "off"); @@ -1094,7 +1094,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) return; } } - if (self->flags & ASYNC_CTS_FLOW) { + if (self->port.flags & ASYNC_CTS_FLOW) { if (tty->hw_stopped) { if (status & IRCOMM_CTS) { IRDA_DEBUG(2, @@ -1327,27 +1327,27 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) seq_puts(m, "Flags:"); sep = ' '; - if (self->flags & ASYNC_CTS_FLOW) { + if (self->port.flags & ASYNC_CTS_FLOW) { seq_printf(m, "%cASYNC_CTS_FLOW", sep); sep = '|'; } - if (self->flags & ASYNC_CHECK_CD) { + if (self->port.flags & ASYNC_CHECK_CD) { seq_printf(m, "%cASYNC_CHECK_CD", sep); sep = '|'; } - if (self->flags & ASYNC_INITIALIZED) { + if (self->port.flags & ASYNC_INITIALIZED) { seq_printf(m, "%cASYNC_INITIALIZED", sep); sep = '|'; } - if (self->flags & ASYNC_LOW_LATENCY) { + if (self->port.flags & ASYNC_LOW_LATENCY) { seq_printf(m, "%cASYNC_LOW_LATENCY", sep); sep = '|'; } - if (self->flags & ASYNC_CLOSING) { + if (self->port.flags & ASYNC_CLOSING) { seq_printf(m, "%cASYNC_CLOSING", sep); sep = '|'; } - if (self->flags & ASYNC_NORMAL_ACTIVE) { + if (self->port.flags & ASYNC_NORMAL_ACTIVE) { seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); sep = '|'; } diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index bb1e9356bb18..bed311af7311 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -566,7 +566,8 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) * will have to wait for the peer device (DCE) to raise the CTS * line. */ - if ((self->flags & ASYNC_CTS_FLOW) && ((self->settings.dce & IRCOMM_CTS) == 0)) { + if ((self->port.flags & ASYNC_CTS_FLOW) && + ((self->settings.dce & IRCOMM_CTS) == 0)) { IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __func__ ); return; } else { @@ -977,7 +978,7 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); ircomm_tty_start_watchdog_timer(self, 3*HZ); - if (self->flags & ASYNC_CHECK_CD) { + if (self->port.flags & ASYNC_CHECK_CD) { /* Drop carrier */ self->settings.dce = IRCOMM_DELTA_CD; ircomm_tty_check_modem_status(self); diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index a6d25e37b6b2..31b917e9c8d8 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -90,19 +90,19 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) /* CTS flow control flag and modem status interrupts */ if (cflag & CRTSCTS) { - self->flags |= ASYNC_CTS_FLOW; + self->port.flags |= ASYNC_CTS_FLOW; self->settings.flow_control |= IRCOMM_RTS_CTS_IN; /* This got me. Bummer. Jean II */ if (self->service_type == IRCOMM_3_WIRE_RAW) IRDA_WARNING("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n", __func__); } else { - self->flags &= ~ASYNC_CTS_FLOW; + self->port.flags &= ~ASYNC_CTS_FLOW; self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN; } if (cflag & CLOCAL) - self->flags &= ~ASYNC_CHECK_CD; + self->port.flags &= ~ASYNC_CHECK_CD; else - self->flags |= ASYNC_CHECK_CD; + self->port.flags |= ASYNC_CHECK_CD; #if 0 /* * Set up parity check flag @@ -270,7 +270,7 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self, memset(&info, 0, sizeof(info)); info.line = self->line; - info.flags = self->flags; + info.flags = self->port.flags; info.baud_base = self->settings.data_rate; info.close_delay = self->port.close_delay; info.closing_wait = self->port.closing_wait; -- cgit v1.2.1 From e673927d8a210ab1db27047080fc1bdb47f7e372 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:20 +0200 Subject: TTY: ircomm, revamp locking Use self->spinlock only for ctrl_skb and tx_skb. TTY stuff is now protected by tty_port->lock. This is needed for further cleanup (and conversion to tty_port helpers). This also closes the race in the end of close. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 8e61026b9dd4..7b2152cfd42a 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -283,13 +283,12 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n", __FILE__, __LINE__, tty->driver->name, self->port.count); - /* As far as I can see, we protect port.count - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&self->port.lock, flags); if (!tty_hung_up_p(filp)) { extra_count = 1; self->port.count--; } - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); self->port.blocked_open++; while (1) { @@ -340,9 +339,9 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, if (extra_count) { /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&self->port.lock, flags); self->port.count++; - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); } self->port.blocked_open--; @@ -409,12 +408,12 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL); } /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&self->port.lock, flags); self->port.count++; tty->driver_data = self; self->tty = tty; - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __func__ , tty->driver->name, self->line, self->port.count); @@ -495,10 +494,10 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&self->port.lock, flags); if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); IRDA_DEBUG(0, "%s(), returning 1\n", __func__ ); return; @@ -524,20 +523,15 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) self->port.count = 0; } if (self->port.count) { - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ ); return; } - /* Hum... Should be test_and_set_bit ??? - Jean II */ set_bit(ASYNCB_CLOSING, &self->port.flags); - /* We need to unlock here (we were unlocking at the end of this - * function), because tty_wait_until_sent() may schedule. - * I don't know if the rest should be protected somehow, - * so someone should check. - Jean II */ - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); /* * Now we wait for the transmit buffer to clear; and we notify @@ -552,16 +546,21 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) tty_driver_flush_buffer(tty); tty_ldisc_flush(tty); + spin_lock_irqsave(&self->port.lock, flags); tty->closing = 0; self->tty = NULL; if (self->port.blocked_open) { - if (self->port.close_delay) + if (self->port.close_delay) { + spin_unlock_irqrestore(&self->port.lock, flags); schedule_timeout_interruptible(self->port.close_delay); + spin_lock_irqsave(&self->port.lock, flags); + } wake_up_interruptible(&self->port.open_wait); } self->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + spin_unlock_irqrestore(&self->port.lock, flags); wake_up_interruptible(&self->port.close_wait); } @@ -1003,12 +1002,11 @@ static void ircomm_tty_hangup(struct tty_struct *tty) /* ircomm_tty_flush_buffer(tty); */ ircomm_tty_shutdown(self); - /* I guess we need to lock here - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&self->port.lock, flags); self->port.flags &= ~ASYNC_NORMAL_ACTIVE; self->tty = NULL; self->port.count = 0; - spin_unlock_irqrestore(&self->spinlock, flags); + spin_unlock_irqrestore(&self->port.lock, flags); wake_up_interruptible(&self->port.open_wait); } -- cgit v1.2.1 From 62f228acb807c370c3b1583bf0f1aa4ab8e19aca Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:21 +0200 Subject: TTY: ircomm, use tty from tty_port This also includes a switch to tty refcounting. It makes sure, the code no longer can access a freed TTY struct. Sometimes the only thing needed is to pass tty down to the callies. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_param.c | 5 --- net/irda/ircomm/ircomm_tty.c | 62 +++++++++++++++++++++++-------------- net/irda/ircomm/ircomm_tty_attach.c | 33 +++++++++++++++----- net/irda/ircomm/ircomm_tty_ioctl.c | 11 ++++--- 4 files changed, 70 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 8b915f3ac3b9..308939128359 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -99,7 +99,6 @@ pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 }; */ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) { - struct tty_struct *tty; unsigned long flags; struct sk_buff *skb; int count; @@ -109,10 +108,6 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - tty = self->tty; - if (!tty) - return 0; - /* Make sure we don't send parameters for raw mode */ if (self->service_type == IRCOMM_3_WIRE_RAW) return 0; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 7b2152cfd42a..03acc073b8c3 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -242,18 +242,15 @@ err: * */ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, - struct file *filp) + struct tty_struct *tty, struct file *filp) { DECLARE_WAITQUEUE(wait, current); int retval; int do_clocal = 0, extra_count = 0; unsigned long flags; - struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); - tty = self->tty; - /* * If non-blocking mode is set, or the port is not enabled, * then make the check up front and then exit. @@ -412,8 +409,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) self->port.count++; tty->driver_data = self; - self->tty = tty; spin_unlock_irqrestore(&self->port.lock, flags); + tty_port_tty_set(&self->port, tty); IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __func__ , tty->driver->name, self->line, self->port.count); @@ -467,7 +464,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) if (ret) return ret; - ret = ircomm_tty_block_til_ready(self, filp); + ret = ircomm_tty_block_til_ready(self, tty, filp); if (ret) { IRDA_DEBUG(2, "%s(), returning after block_til_ready with %d\n", __func__ , @@ -548,7 +545,6 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) spin_lock_irqsave(&self->port.lock, flags); tty->closing = 0; - self->tty = NULL; if (self->port.blocked_open) { if (self->port.close_delay) { @@ -562,6 +558,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) self->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); spin_unlock_irqrestore(&self->port.lock, flags); wake_up_interruptible(&self->port.close_wait); + tty_port_tty_set(&self->port, NULL); } /* @@ -604,7 +601,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) if (!self || self->magic != IRCOMM_TTY_MAGIC) return; - tty = self->tty; + tty = tty_port_tty_get(&self->port); if (!tty) return; @@ -625,7 +622,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) } if (tty->hw_stopped) - return; + goto put; /* Unlink transmit buffer */ spin_lock_irqsave(&self->spinlock, flags); @@ -644,6 +641,8 @@ static void ircomm_tty_do_softint(struct work_struct *work) /* Check if user (still) wants to be waken up */ tty_wakeup(tty); +put: + tty_kref_put(tty); } /* @@ -1004,7 +1003,11 @@ static void ircomm_tty_hangup(struct tty_struct *tty) spin_lock_irqsave(&self->port.lock, flags); self->port.flags &= ~ASYNC_NORMAL_ACTIVE; - self->tty = NULL; + if (self->port.tty) { + set_bit(TTY_IO_ERROR, &self->port.tty->flags); + tty_kref_put(self->port.tty); + } + self->port.tty = NULL; self->port.count = 0; spin_unlock_irqrestore(&self->port.lock, flags); @@ -1068,7 +1071,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - tty = self->tty; + tty = tty_port_tty_get(&self->port); status = self->settings.dce; @@ -1089,10 +1092,10 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) tty_hangup(tty); /* Hangup will remote the tty, so better break out */ - return; + goto put; } } - if (self->port.flags & ASYNC_CTS_FLOW) { + if (tty && self->port.flags & ASYNC_CTS_FLOW) { if (tty->hw_stopped) { if (status & IRCOMM_CTS) { IRDA_DEBUG(2, @@ -1103,7 +1106,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) wake_up_interruptible(&self->port.open_wait); schedule_work(&self->tqueue); - return; + goto put; } } else { if (!(status & IRCOMM_CTS)) { @@ -1113,6 +1116,8 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) } } } +put: + tty_kref_put(tty); } /* @@ -1125,6 +1130,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -1132,7 +1138,8 @@ static int ircomm_tty_data_indication(void *instance, void *sap, IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); IRDA_ASSERT(skb != NULL, return -1;); - if (!self->tty) { + tty = tty_port_tty_get(&self->port); + if (!tty) { IRDA_DEBUG(0, "%s(), no tty!\n", __func__ ); return 0; } @@ -1143,7 +1150,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * Devices like WinCE can do this, and since they don't send any * params, we can just as well declare the hardware for running. */ - if (self->tty->hw_stopped && (self->flow == FLOW_START)) { + if (tty->hw_stopped && (self->flow == FLOW_START)) { IRDA_DEBUG(0, "%s(), polling for line settings!\n", __func__ ); ircomm_param_request(self, IRCOMM_POLL, TRUE); @@ -1156,8 +1163,9 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * Use flip buffer functions since the code may be called from interrupt * context */ - tty_insert_flip_string(self->tty, skb->data, skb->len); - tty_flip_buffer_push(self->tty); + tty_insert_flip_string(tty, skb->data, skb->len); + tty_flip_buffer_push(tty); + tty_kref_put(tty); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ @@ -1208,12 +1216,13 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - tty = self->tty; + tty = tty_port_tty_get(&self->port); switch (cmd) { case FLOW_START: IRDA_DEBUG(2, "%s(), hw start!\n", __func__ ); - tty->hw_stopped = 0; + if (tty) + tty->hw_stopped = 0; /* ircomm_tty_do_softint will take care of the rest */ schedule_work(&self->tqueue); @@ -1221,15 +1230,19 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, default: /* If we get here, something is very wrong, better stop */ case FLOW_STOP: IRDA_DEBUG(2, "%s(), hw stopped!\n", __func__ ); - tty->hw_stopped = 1; + if (tty) + tty->hw_stopped = 1; break; } + + tty_kref_put(tty); self->flow = cmd; } #ifdef CONFIG_PROC_FS static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) { + struct tty_struct *tty; char sep; seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); @@ -1356,9 +1369,12 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) seq_printf(m, "Max data size: %d\n", self->max_data_size); seq_printf(m, "Max header size: %d\n", self->max_header_size); - if (self->tty) + tty = tty_port_tty_get(&self->port); + if (tty) { seq_printf(m, "Hardware: %s\n", - self->tty->hw_stopped ? "Stopped" : "Running"); + tty->hw_stopped ? "Stopped" : "Running"); + tty_kref_put(tty); + } } static int ircomm_tty_proc_show(struct seq_file *m, void *v) diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index bed311af7311..3ab70e7a0715 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -130,6 +130,8 @@ static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, */ int ircomm_tty_attach_cable(struct ircomm_tty_cb *self) { + struct tty_struct *tty; + IRDA_DEBUG(0, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return -1;); @@ -142,7 +144,11 @@ int ircomm_tty_attach_cable(struct ircomm_tty_cb *self) } /* Make sure nobody tries to write before the link is up */ - self->tty->hw_stopped = 1; + tty = tty_port_tty_get(&self->port); + if (tty) { + tty->hw_stopped = 1; + tty_kref_put(tty); + } ircomm_tty_ias_register(self); @@ -398,23 +404,26 @@ void ircomm_tty_disconnect_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - if (!self->tty) + tty = tty_port_tty_get(&self->port); + if (!tty) return; /* This will stop control data transfers */ self->flow = FLOW_STOP; /* Stop data transfers */ - self->tty->hw_stopped = 1; + tty->hw_stopped = 1; ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL, NULL); + tty_kref_put(tty); } /* @@ -550,12 +559,15 @@ void ircomm_tty_connect_indication(void *instance, void *sap, */ void ircomm_tty_link_established(struct ircomm_tty_cb *self) { + struct tty_struct *tty; + IRDA_DEBUG(2, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - if (!self->tty) + tty = tty_port_tty_get(&self->port); + if (!tty) return; del_timer(&self->watchdog_timer); @@ -569,17 +581,19 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) if ((self->port.flags & ASYNC_CTS_FLOW) && ((self->settings.dce & IRCOMM_CTS) == 0)) { IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __func__ ); - return; + goto put; } else { IRDA_DEBUG(1, "%s(), starting hardware!\n", __func__ ); - self->tty->hw_stopped = 0; + tty->hw_stopped = 0; /* Wake up processes blocked on open */ wake_up_interruptible(&self->port.open_wait); } schedule_work(&self->tqueue); +put: + tty_kref_put(tty); } /* @@ -983,9 +997,12 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, self->settings.dce = IRCOMM_DELTA_CD; ircomm_tty_check_modem_status(self); } else { + struct tty_struct *tty = tty_port_tty_get(&self->port); IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ ); - if (self->tty) - tty_hangup(self->tty); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } } break; default: diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 31b917e9c8d8..0eab6500e99f 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -52,17 +52,18 @@ * Change speed of the driver. If the remote device is a DCE, then this * should make it change the speed of its serial port */ -static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) +static void ircomm_tty_change_speed(struct ircomm_tty_cb *self, + struct tty_struct *tty) { unsigned int cflag, cval; int baud; IRDA_DEBUG(2, "%s()\n", __func__ ); - if (!self->tty || !self->tty->termios || !self->ircomm) + if (!self->ircomm) return; - cflag = self->tty->termios->c_cflag; + cflag = tty->termios->c_cflag; /* byte size and parity */ switch (cflag & CSIZE) { @@ -81,7 +82,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) cval |= IRCOMM_PARITY_EVEN; /* Determine divisor based on baud rate */ - baud = tty_get_baud_rate(self->tty); + baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; /* B0 transition handled in rs_set_termios */ @@ -159,7 +160,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty, return; } - ircomm_tty_change_speed(self); + ircomm_tty_change_speed(self, tty); /* Handle transition to B0 status */ if ((old_termios->c_cflag & CBAUD) && -- cgit v1.2.1 From 38c58032f07be4dee764baa573b233e9a828c119 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:22 +0200 Subject: TTY: ircomm, define local tty_port In some functions we use tty_port heavily. So let us have a local pointer to that variable instead of having self->port all over the code. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 109 ++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 03acc073b8c3..199d9cbf9948 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -244,6 +244,7 @@ err: static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, struct tty_struct *tty, struct file *filp) { + struct tty_port *port = &self->port; DECLARE_WAITQUEUE(wait, current); int retval; int do_clocal = 0, extra_count = 0; @@ -257,7 +258,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, */ if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ - self->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ ); return 0; } @@ -269,24 +270,24 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, self->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * mgsl_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&self->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, self->port.count); + __FILE__, __LINE__, tty->driver->name, port->count); - spin_lock_irqsave(&self->port.lock, flags); + spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) { extra_count = 1; - self->port.count--; + port->count--; } - spin_unlock_irqrestore(&self->port.lock, flags); - self->port.blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); + port->blocked_open++; while (1) { if (tty->termios->c_cflag & CBAUD) { @@ -302,8 +303,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, current->state = TASK_INTERRUPTIBLE; if (tty_hung_up_p(filp) || - !test_bit(ASYNCB_INITIALIZED, &self->port.flags)) { - retval = (self->port.flags & ASYNC_HUP_NOTIFY) ? + !test_bit(ASYNCB_INITIALIZED, &port->flags)) { + retval = (port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } @@ -313,7 +314,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * specified, we cannot return before the IrCOMM link is * ready */ - if (!test_bit(ASYNCB_CLOSING, &self->port.flags) && + if (!test_bit(ASYNCB_CLOSING, &port->flags) && (do_clocal || (self->settings.dce & IRCOMM_CD)) && self->state == IRCOMM_TTY_READY) { @@ -326,27 +327,27 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, } IRDA_DEBUG(1, "%s(%d):block_til_ready blocking on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, self->port.count); + __FILE__, __LINE__, tty->driver->name, port->count); schedule(); } __set_current_state(TASK_RUNNING); - remove_wait_queue(&self->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); if (extra_count) { /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->port.lock, flags); - self->port.count++; - spin_unlock_irqrestore(&self->port.lock, flags); + spin_lock_irqsave(&port->lock, flags); + port->count++; + spin_unlock_irqrestore(&port->lock, flags); } - self->port.blocked_open--; + port->blocked_open--; IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, self->port.count); + __FILE__, __LINE__, tty->driver->name, port->count); if (!retval) - self->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return retval; } @@ -484,6 +485,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; + struct tty_port *port = &self->port; unsigned long flags; IRDA_DEBUG(0, "%s()\n", __func__ ); @@ -491,16 +493,16 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - spin_lock_irqsave(&self->port.lock, flags); + spin_lock_irqsave(&port->lock, flags); if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&self->port.lock, flags); + spin_unlock_irqrestore(&port->lock, flags); IRDA_DEBUG(0, "%s(), returning 1\n", __func__ ); return; } - if ((tty->count == 1) && (self->port.count != 1)) { + if ((tty->count == 1) && (port->count != 1)) { /* * Uh, oh. tty->count is 1, which means that the tty * structure will be freed. state->count should always @@ -510,55 +512,55 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) */ IRDA_DEBUG(0, "%s(), bad serial port count; " "tty->count is 1, state->count is %d\n", __func__ , - self->port.count); - self->port.count = 1; + port->count); + port->count = 1; } - if (--self->port.count < 0) { + if (--port->count < 0) { IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n", - __func__, self->line, self->port.count); - self->port.count = 0; + __func__, self->line, port->count); + port->count = 0; } - if (self->port.count) { - spin_unlock_irqrestore(&self->port.lock, flags); + if (port->count) { + spin_unlock_irqrestore(&port->lock, flags); IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ ); return; } - set_bit(ASYNCB_CLOSING, &self->port.flags); + set_bit(ASYNCB_CLOSING, &port->flags); - spin_unlock_irqrestore(&self->port.lock, flags); + spin_unlock_irqrestore(&port->lock, flags); /* * Now we wait for the transmit buffer to clear; and we notify * the line discipline to only process XON/XOFF characters. */ tty->closing = 1; - if (self->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent_from_close(tty, self->port.closing_wait); + if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE) + tty_wait_until_sent_from_close(tty, port->closing_wait); ircomm_tty_shutdown(self); tty_driver_flush_buffer(tty); tty_ldisc_flush(tty); - spin_lock_irqsave(&self->port.lock, flags); + spin_lock_irqsave(&port->lock, flags); tty->closing = 0; - if (self->port.blocked_open) { - if (self->port.close_delay) { - spin_unlock_irqrestore(&self->port.lock, flags); - schedule_timeout_interruptible(self->port.close_delay); - spin_lock_irqsave(&self->port.lock, flags); + if (port->blocked_open) { + if (port->close_delay) { + spin_unlock_irqrestore(&port->lock, flags); + schedule_timeout_interruptible(port->close_delay); + spin_lock_irqsave(&port->lock, flags); } - wake_up_interruptible(&self->port.open_wait); + wake_up_interruptible(&port->open_wait); } - self->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - spin_unlock_irqrestore(&self->port.lock, flags); - wake_up_interruptible(&self->port.close_wait); - tty_port_tty_set(&self->port, NULL); + port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + spin_unlock_irqrestore(&port->lock, flags); + wake_up_interruptible(&port->close_wait); + tty_port_tty_set(port, NULL); } /* @@ -991,6 +993,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self) static void ircomm_tty_hangup(struct tty_struct *tty) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; + struct tty_port *port = &self->port; unsigned long flags; IRDA_DEBUG(0, "%s()\n", __func__ ); @@ -1001,17 +1004,17 @@ static void ircomm_tty_hangup(struct tty_struct *tty) /* ircomm_tty_flush_buffer(tty); */ ircomm_tty_shutdown(self); - spin_lock_irqsave(&self->port.lock, flags); - self->port.flags &= ~ASYNC_NORMAL_ACTIVE; - if (self->port.tty) { - set_bit(TTY_IO_ERROR, &self->port.tty->flags); - tty_kref_put(self->port.tty); + spin_lock_irqsave(&port->lock, flags); + port->flags &= ~ASYNC_NORMAL_ACTIVE; + if (port->tty) { + set_bit(TTY_IO_ERROR, &port->tty->flags); + tty_kref_put(port->tty); } - self->port.tty = NULL; - self->port.count = 0; - spin_unlock_irqrestore(&self->port.lock, flags); + port->tty = NULL; + port->count = 0; + spin_unlock_irqrestore(&port->lock, flags); - wake_up_interruptible(&self->port.open_wait); + wake_up_interruptible(&port->open_wait); } /* -- cgit v1.2.1 From 0ba9ff846b2f4720e30ace37b028ef14fb97fb74 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:23 +0200 Subject: TTY: ircomm, define carrier routines These will be used by the tty_port wait_til_ready later. (Now they are used by our code.) Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 199d9cbf9948..3fdce18c6931 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -104,6 +104,35 @@ static const struct tty_operations ops = { #endif /* CONFIG_PROC_FS */ }; +static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise) +{ + struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, + port); + /* + * Here, we use to lock those two guys, but as ircomm_param_request() + * does it itself, I don't see the point (and I see the deadlock). + * Jean II + */ + if (raise) + self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR; + else + self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR); + + ircomm_param_request(self, IRCOMM_DTE, TRUE); +} + +static int ircomm_port_carrier_raised(struct tty_port *port) +{ + struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, + port); + return self->settings.dce & IRCOMM_CD; +} + +static const struct tty_port_operations ircomm_port_ops = { + .dtr_rts = ircomm_port_raise_dtr_rts, + .carrier_raised = ircomm_port_carrier_raised, +}; + /* * Function ircomm_tty_init() * @@ -290,15 +319,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, port->blocked_open++; while (1) { - if (tty->termios->c_cflag & CBAUD) { - /* Here, we use to lock those two guys, but - * as ircomm_param_request() does it itself, - * I don't see the point (and I see the deadlock). - * Jean II */ - self->settings.dte |= IRCOMM_RTS + IRCOMM_DTR; - - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } + if (tty->termios->c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); current->state = TASK_INTERRUPTIBLE; @@ -315,7 +337,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * ready */ if (!test_bit(ASYNCB_CLOSING, &port->flags) && - (do_clocal || (self->settings.dce & IRCOMM_CD)) && + (do_clocal || tty_port_carrier_raised(port)) && self->state == IRCOMM_TTY_READY) { break; @@ -379,6 +401,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) } tty_port_init(&self->port); + self->port.ops = &ircomm_port_ops; self->magic = IRCOMM_TTY_MAGIC; self->flow = FLOW_STOP; -- cgit v1.2.1 From a1e844036af9cb0d87e878a4f90ce64713c76e5a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:24 +0200 Subject: TTY: ircomm, use tty_port_close_end helper Again, the code is identical, so leverage the helper code. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 3fdce18c6931..cfe352dfb484 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -568,21 +568,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) tty_driver_flush_buffer(tty); tty_ldisc_flush(tty); - spin_lock_irqsave(&port->lock, flags); - tty->closing = 0; - - if (port->blocked_open) { - if (port->close_delay) { - spin_unlock_irqrestore(&port->lock, flags); - schedule_timeout_interruptible(port->close_delay); - spin_lock_irqsave(&port->lock, flags); - } - wake_up_interruptible(&port->open_wait); - } - - port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - spin_unlock_irqrestore(&port->lock, flags); - wake_up_interruptible(&port->close_wait); + tty_port_close_end(port, tty); tty_port_tty_set(port, NULL); } -- cgit v1.2.1 From c0e7865003ae9929f32bcb7277f591115fa242b7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Jun 2012 13:35:25 +0200 Subject: TTY: ircomm, use tty_port_close_start helper Again, the code is identical, so leverage the helper code. Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Cc: netdev@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 48 +------------------------------------------- 1 file changed, 1 insertion(+), 47 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index cfe352dfb484..4e35b45c1c73 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -509,64 +509,18 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; struct tty_port *port = &self->port; - unsigned long flags; IRDA_DEBUG(0, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - spin_lock_irqsave(&port->lock, flags); - - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&port->lock, flags); - - IRDA_DEBUG(0, "%s(), returning 1\n", __func__ ); - return; - } - - if ((tty->count == 1) && (port->count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. state->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - IRDA_DEBUG(0, "%s(), bad serial port count; " - "tty->count is 1, state->count is %d\n", __func__ , - port->count); - port->count = 1; - } - - if (--port->count < 0) { - IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n", - __func__, self->line, port->count); - port->count = 0; - } - if (port->count) { - spin_unlock_irqrestore(&port->lock, flags); - - IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ ); + if (tty_port_close_start(port, tty, filp) == 0) return; - } - - set_bit(ASYNCB_CLOSING, &port->flags); - - spin_unlock_irqrestore(&port->lock, flags); - - /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - tty->closing = 1; - if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent_from_close(tty, port->closing_wait); ircomm_tty_shutdown(self); tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); tty_port_close_end(port, tty); tty_port_tty_set(port, NULL); -- cgit v1.2.1 From f5e3bcc504c3c35cc6e06a9ee42efed7c274066b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 29 Jun 2012 14:48:36 +0100 Subject: tty: localise the lock The termios and other changes mean the other protections needed on the driver tty arrays should be adequate. Turn it all back on. This contains pieces folded in from the fixes made to the original patches | From: Geert Uytterhoeven (fix m68k) | From: Paul Gortmaker (fix cris) | From: Jiri Kosina (lockdep) | From: Eric Dumazet (lockdep) Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d1820ff14aee..aa5d73b786ac 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -710,9 +710,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } - tty_unlock(); + tty_unlock(tty); schedule(); - tty_lock(); + tty_lock(tty); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); -- cgit v1.2.1 From 6d31a88cb2e01d46c0cb74aa5da529e1f92ae3db Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 14 Jul 2012 15:31:27 +0100 Subject: tty: revert incorrectly applied lock patch I sent GregKH this after the pre-requisites. He dropped the pre-requesites for good reason and unfortunately then applied this patch. Without this reverted you get random kernel memory corruption which will make bisecting anything between it and the properly applied patches a complete sod. Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index aa5d73b786ac..d1820ff14aee 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -710,9 +710,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } - tty_unlock(tty); + tty_unlock(); schedule(); - tty_lock(tty); + tty_lock(); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); -- cgit v1.2.1 From adc8d746caa67fff4b53ba3e5163a6cbacc3b523 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 14 Jul 2012 15:31:47 +0100 Subject: tty: move the termios object into the tty This will let us sort out a whole pile of tty related races. The alternative would be to keep points and refcount the termios objects. However 1. They are tiny anyway 2. Many devices don't use the stored copies 3. We can remove a pty special case Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 2 +- net/irda/ircomm/ircomm_tty.c | 12 ++++++------ net/irda/ircomm/ircomm_tty_ioctl.c | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d1820ff14aee..363bca12f00d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -866,7 +866,7 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned l static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { - struct ktermios *new = tty->termios; + struct ktermios *new = &tty->termios; int old_baud_rate = tty_termios_baud_rate(old); int new_baud_rate = tty_termios_baud_rate(new); diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 4e35b45c1c73..7a0d6115d06f 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -292,7 +292,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, return 0; } - if (tty->termios->c_cflag & CLOCAL) { + if (tty->termios.c_cflag & CLOCAL) { IRDA_DEBUG(1, "%s(), doing CLOCAL!\n", __func__ ); do_clocal = 1; } @@ -319,7 +319,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, port->blocked_open++; while (1) { - if (tty->termios->c_cflag & CBAUD) + if (tty->termios.c_cflag & CBAUD) tty_port_raise_dtr_rts(port); current->state = TASK_INTERRUPTIBLE; @@ -421,8 +421,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * * Note this is completely usafe and doesn't work properly */ - tty->termios->c_iflag = 0; - tty->termios->c_oflag = 0; + tty->termios.c_iflag = 0; + tty->termios.c_oflag = 0; /* Insert into hash */ /* FIXME there is a window from find to here */ @@ -842,7 +842,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty) ircomm_tty_send_xchar(tty, STOP_CHAR(tty)); /* Hardware flow control? */ - if (tty->termios->c_cflag & CRTSCTS) { + if (tty->termios.c_cflag & CRTSCTS) { self->settings.dte &= ~IRCOMM_RTS; self->settings.dte |= IRCOMM_DELTA_RTS; @@ -874,7 +874,7 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty) } /* Using hardware flow control? */ - if (tty->termios->c_cflag & CRTSCTS) { + if (tty->termios.c_cflag & CRTSCTS) { self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS); ircomm_param_request(self, IRCOMM_DTE, TRUE); diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 0eab6500e99f..b343f50dc8d7 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -63,7 +63,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self, if (!self->ircomm) return; - cflag = tty->termios->c_cflag; + cflag = tty->termios.c_cflag; /* byte size and parity */ switch (cflag & CSIZE) { @@ -149,12 +149,12 @@ void ircomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned int cflag = tty->termios->c_cflag; + unsigned int cflag = tty->termios.c_cflag; IRDA_DEBUG(2, "%s()\n", __func__ ); if ((cflag == old_termios->c_cflag) && - (RELEVANT_IFLAG(tty->termios->c_iflag) == + (RELEVANT_IFLAG(tty->termios.c_iflag) == RELEVANT_IFLAG(old_termios->c_iflag))) { return; @@ -173,7 +173,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty, if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) { self->settings.dte |= IRCOMM_DTR; - if (!(tty->termios->c_cflag & CRTSCTS) || + if (!(tty->termios.c_cflag & CRTSCTS) || !test_bit(TTY_THROTTLED, &tty->flags)) { self->settings.dte |= IRCOMM_RTS; } @@ -182,7 +182,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty, /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && - !(tty->termios->c_cflag & CRTSCTS)) + !(tty->termios.c_cflag & CRTSCTS)) { tty->hw_stopped = 0; ircomm_tty_start(tty); -- cgit v1.2.1 From bae35d92b6a1b6fd8c699415ab90aeeea2a56bc3 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 24 Jul 2012 11:52:35 +0800 Subject: mac80211: don't re-init rate control when receiving mesh beacon Rate control is re-initialized whenever a beacon from a mesh peer received, breaking the algorithms and resulting in low performance. Return early from mesh_peer_init if we already established a link with this peer to avoid this. Signed-off-by: Chun-Yeow Yeoh [clarify commit message] Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index af671b984df3..fa642c794719 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -362,6 +362,11 @@ static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, spin_lock_bh(&sta->lock); sta->last_rx = jiffies; + if (sta->plink_state == NL80211_PLINK_ESTAB) { + spin_unlock_bh(&sta->lock); + return sta; + } + sta->sta.supp_rates[band] = rates; if (elems->ht_cap_elem && sdata->local->_oper_channel_type != NL80211_CHAN_NO_HT) -- cgit v1.2.1 From d545daba5357c1ca377a4fe917ccf4de3a3031e0 Mon Sep 17 00:00:00 2001 From: Mahesh Palivela Date: Tue, 24 Jul 2012 03:33:10 +0000 Subject: mac80211: VHT (11ac) association Insert VHT IEs into association frames to allow mac80211 to connect as a VHT client. Signed-off-by: Mahesh Palivela [clarify commit message] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bb61f7718c4c..3e2f03b1b50e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -359,6 +359,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_NULLFUNC_ACKED = BIT(8), IEEE80211_STA_RESET_SIGNAL_AVE = BIT(9), IEEE80211_STA_DISABLE_40MHZ = BIT(10), + IEEE80211_STA_DISABLE_VHT = BIT(11), }; struct ieee80211_mgd_auth_data { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cef0c9e79aba..725258c20746 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -326,6 +326,26 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, ieee80211_ie_build_ht_cap(pos, &ht_cap, cap); } +static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_supported_band *sband) +{ + u8 *pos; + u32 cap; + struct ieee80211_sta_vht_cap vht_cap; + + BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); + + memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + + /* determine capability flags */ + cap = vht_cap.cap; + + /* reserve and fill IE */ + pos = skb_put(skb, sizeof(struct ieee80211_vht_capabilities) + 2); + ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); +} + static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -371,6 +391,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 4 + /* power capability */ 2 + 2 * sband->n_channels + /* supported channels */ 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ + 2 + sizeof(struct ieee80211_vht_capabilities) + /* VHT */ assoc_data->ie_len + /* extra IEs */ 9, /* WMM */ GFP_KERNEL); @@ -503,6 +524,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param, sband, local->oper_channel, ifmgd->ap_smps); + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) + ieee80211_add_vht_ie(sdata, skb, sband); + /* if present, add any custom non-vendor IEs that go after HT */ if (assoc_data->ie_len && assoc_data->ie) { noffset = ieee80211_ie_split_vendor(assoc_data->ie, @@ -3301,6 +3325,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; + ifmgd->flags &= ~IEEE80211_STA_DISABLE_VHT; ifmgd->beacon_crc_valid = false; @@ -3316,13 +3341,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; netdev_info(sdata->dev, - "disabling HT due to WEP/TKIP use\n"); + "disabling HT/VHT due to WEP/TKIP use\n"); } } - if (req->flags & ASSOC_REQ_DISABLE_HT) + if (req->flags & ASSOC_REQ_DISABLE_HT) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; @@ -3333,6 +3361,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, "disabling HT as WMM/QoS is not supported\n"); } + /* disable VHT if we don't support it or the AP doesn't use WMM */ + if (!sband->vht_cap.vht_supported || + local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) { + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + netdev_info(sdata->dev, + "disabling VHT as WMM/QoS is not supported\n"); + } + memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, sizeof(ifmgd->ht_capa_mask)); -- cgit v1.2.1 From 7eeff74c29259e9cb7765e3845c0b74057f744da Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2012 10:27:27 +0200 Subject: mac80211: don't react to beacon loss if HW monitoring If the HW is monitoring connection loss (as advertised by IEEE80211_HW_CONNECTION_MONITOR) but not filtering beacons (IEEE80211_VIF_BEACON_FILTER) then mac80211 will still start the beacon loss timer and if a few beacons are lost, e.g. due to scanning, drop the connection. If the hardware doesn't advertise connection monitoring, then it won't drop the connection right away but probe the AP, which is intended, but due to the logic in the timer when connection monitoring is done it assumes the connection was actually lost. Fix this problem by not starting the timer when the HW does connection monitoring. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 725258c20746..81b2269e8f3a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -146,6 +146,9 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) return; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + return; + mod_timer(&sdata->u.mgd.bcn_mon_timer, round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout)); } -- cgit v1.2.1 From 8c7d857c4a4a552d8d3e1b2e24e1864ec2989285 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 25 Jul 2012 01:42:36 +0300 Subject: mac80211: don't call mgd_prepare_tx when associated This doesn't make any sense since we are expected to be on the medium or at least to Tx only when we are on the right channel and the AP/GO can hear us. Move the call to mgd_prepare_tx() for deauth to be only done in case we're sending a deauth while not associated. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 81b2269e8f3a..f0d6fa283071 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -610,8 +610,6 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - drv_mgd_prepare_tx(local, sdata); - ieee80211_tx_skb(sdata, skb); } } @@ -3504,14 +3502,17 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code); if (ifmgd->associated && - ether_addr_equal(ifmgd->associated->bssid, req->bssid)) + ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, true, frame_buf); - else + } else { + drv_mgd_prepare_tx(sdata->local, sdata); ieee80211_send_deauth_disassoc(sdata, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, true, frame_buf); + } + mutex_unlock(&ifmgd->mtx); __cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); -- cgit v1.2.1 From e21768928d73df55e648869d3ae159475d1e4b7d Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 25 Jul 2012 13:56:53 +0300 Subject: cfg80211: unify IE search Remove ah-hoc IE search code found in the ieee80211_bss_get_ie() and use cfg80211_find_ie() instead. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- net/wireless/util.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 26f8cd30f712..ce393dd8c928 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -684,22 +684,10 @@ EXPORT_SYMBOL(cfg80211_classify8021d); const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie) { - u8 *end, *pos; - - pos = bss->information_elements; - if (pos == NULL) + if (bss->information_elements == NULL) return NULL; - end = pos + bss->len_information_elements; - - while (pos + 1 < end) { - if (pos + 2 + pos[1] > end) - break; - if (pos[0] == ie) - return pos; - pos += 2 + pos[1]; - } - - return NULL; + return cfg80211_find_ie(ie, bss->information_elements, + bss->len_information_elements); } EXPORT_SYMBOL(ieee80211_bss_get_ie); -- cgit v1.2.1 From ab09587740fddf6b4116be7b6716ab47f34d2634 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Fri, 27 Jul 2012 12:33:22 +0300 Subject: mac80211: add PS flag to bss_conf Currently, ps mode is indicated per device (rather than per interface), which doesn't make a lot of sense. Moreover, there are subtle bugs caused by the inability to indicate ps change along with other changes (e.g. when the AP deauth us, we'd like to indicate CHANGED_PS | CHANGED_ASSOC, as changing PS before notifying about disassociation will result in null-packets being sent (if IEEE80211_HW_SUPPORTS_DYNAMIC_PS) while the sta is already disconnected.) Keep the current per-device notifications, and add parallel per-vif notifications. In order to keep it simple, the per-device ps and the per-vif ps are orthogonal - the per-vif ps configuration is determined only by the user configuration (enable/disable) and the connection state, and is not affected by other vifs state and (temporary) dynamic_ps/offchannel operations (unlike per-device ps). Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 ++++-- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 15 +++++++++++++++ net/mac80211/util.c | 3 ++- 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d41974aacf51..06b8d39780e9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1285,9 +1285,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, mutex_unlock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION && - params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) + params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local, -1); - + ieee80211_recalc_ps_vif(sdata); + } return 0; } @@ -2079,6 +2080,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps_vif(sdata); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3e2f03b1b50e..8e65ad9c870a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1203,6 +1203,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); +void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f0d6fa283071..ea46c6446450 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1032,6 +1032,16 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) ieee80211_change_ps(local); } +void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata) +{ + bool ps_allowed = ieee80211_powersave_allowed(sdata); + + if (sdata->vif.bss_conf.ps != ps_allowed) { + sdata->vif.bss_conf.ps = ps_allowed; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_PS); + } +} + void ieee80211_dynamic_ps_disable_work(struct work_struct *work) { struct ieee80211_local *local = @@ -1335,6 +1345,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_smps(local); mutex_unlock(&local->iflist_mtx); + ieee80211_recalc_ps_vif(sdata); + netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); } @@ -1396,6 +1408,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, } local->ps_sdata = NULL; + /* disable per-vif ps */ + ieee80211_recalc_ps_vif(sdata); + /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ if (tx) drv_flush(local, false); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 39b82fee4904..037d148e9f19 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1359,7 +1359,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | - BSS_CHANGED_ARP_FILTER; + BSS_CHANGED_ARP_FILTER | + BSS_CHANGED_PS; mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); mutex_unlock(&sdata->u.mgd.mtx); -- cgit v1.2.1 From 36323f817af0376c78612cfdab714b0feb05fea5 Mon Sep 17 00:00:00 2001 From: Thomas Huehn Date: Mon, 23 Jul 2012 21:33:42 +0200 Subject: mac80211: move TX station pointer and restructure TX Remove the control.sta pointer from ieee80211_tx_info to free up sufficient space in the TX skb control buffer for the upcoming Transmit Power Control (TPC). Instead, the pointer is now on the stack in a new control struct that is passed as a function parameter to the drivers' tx method. Signed-off-by: Thomas Huehn Signed-off-by: Alina Friedrichsen Signed-off-by: Felix Fietkau [reworded commit message] Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 6 ++++-- net/mac80211/tx.c | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index df9203199102..a81117a83996 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -22,9 +22,11 @@ get_bss_sdata(struct ieee80211_sub_if_data *sdata) return sdata; } -static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) +static inline void drv_tx(struct ieee80211_local *local, + struct ieee80211_tx_control *control, + struct sk_buff *skb) { - local->ops->tx(&local->hw, skb); + local->ops->tx(&local->hw, control, skb); } static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index acf712ffb5e6..7558ba58ea23 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1204,6 +1204,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, struct sk_buff_head *skbs, bool txpending) { + struct ieee80211_tx_control control; struct sk_buff *skb, *tmp; unsigned long flags; @@ -1240,10 +1241,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - info->control.sta = sta; + control.sta = sta; __skb_unlink(skb, skbs); - drv_tx(local, skb); + drv_tx(local, &control, skb); } return true; -- cgit v1.2.1 From 1b49de26566e7175e8f2d0934db6d9119f553b56 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Jul 2012 10:29:14 +0200 Subject: mac80211: supress HT/VHT disable if not supported If HT/VHT isn't supported by us we shouldn't print a message that we disabled it, do that only if the AP didn't support WMM and we therefore disable it. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ea46c6446450..c8465478b4b0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3373,16 +3373,18 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (!sband->ht_cap.ht_supported || local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; - netdev_info(sdata->dev, - "disabling HT as WMM/QoS is not supported\n"); + if (!bss->wmm_used) + netdev_info(sdata->dev, + "disabling HT as WMM/QoS is not supported by the AP\n"); } /* disable VHT if we don't support it or the AP doesn't use WMM */ if (!sband->vht_cap.vht_supported || local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - netdev_info(sdata->dev, - "disabling VHT as WMM/QoS is not supported\n"); + if (!bss->wmm_used) + netdev_info(sdata->dev, + "disabling VHT as WMM/QoS is not supported by the AP\n"); } memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); -- cgit v1.2.1 From 13e0c8e355983cdd4ea7accc3b3208e80944716d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Jul 2012 10:43:16 +0200 Subject: mac80211: rename sta to new_sta In ieee80211_prep_connection(), the station (if not NULL) is the new station (representing the AP) that needs to be added. Rename the variable to "new_sta" to clarify this. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c8465478b4b0..e065ef5f7b2f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3044,7 +3044,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)cbss->priv; - struct sta_info *sta = NULL; + struct sta_info *new_sta = NULL; bool have_sta = false; int err; int ht_cfreq; @@ -3063,8 +3063,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (!have_sta) { - sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); - if (!sta) + new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); + if (!new_sta) return -ENOMEM; } @@ -3135,7 +3135,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, local->oper_channel = cbss->channel; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if (sta) { + if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; @@ -3160,7 +3160,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, basic_rates = BIT(min_rate_index); } - sta->sta.supp_rates[cbss->channel->band] = rates; + new_sta->sta.supp_rates[cbss->channel->band] = rates; sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ @@ -3183,10 +3183,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BEACON_INT); if (assoc) - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(new_sta, IEEE80211_STA_AUTH); - err = sta_info_insert(sta); - sta = NULL; + err = sta_info_insert(new_sta); + new_sta = NULL; if (err) { sdata_info(sdata, "failed to insert STA entry for the AP (error %d)\n", -- cgit v1.2.1 From b17166a707e748ad87907f38431a1df26bb643f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Jul 2012 11:41:27 +0200 Subject: mac80211: set channel only once during auth/assoc There's no need to set up the channel during auth and again during assoc, just do it once. Currently this doesn't result in any changes since calling hw_config() with an unchanged channel will return early, but with the channel context work this has an impact on channel context assignment. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 69 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e065ef5f7b2f..682055207588 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3038,41 +3038,17 @@ int ieee80211_max_network_latency(struct notifier_block *nb, return 0; } -static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, - struct cfg80211_bss *cbss, bool assoc) +static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *cbss) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_bss *bss = (void *)cbss->priv; - struct sta_info *new_sta = NULL; - bool have_sta = false; - int err; int ht_cfreq; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; const u8 *ht_oper_ie; const struct ieee80211_ht_operation *ht_oper = NULL; struct ieee80211_supported_band *sband; - if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) - return -EINVAL; - - if (assoc) { - rcu_read_lock(); - have_sta = sta_info_get(sdata, cbss->bssid); - rcu_read_unlock(); - } - - if (!have_sta) { - new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); - if (!new_sta) - return -ENOMEM; - } - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - - /* switch to the right channel */ sband = local->hw.wiphy->bands[cbss->channel->band]; ifmgd->flags &= ~IEEE80211_STA_DISABLE_40MHZ; @@ -3135,10 +3111,51 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, local->oper_channel = cbss->channel; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + return 0; +} + +static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *cbss, bool assoc) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_bss *bss = (void *)cbss->priv; + struct sta_info *new_sta = NULL; + bool have_sta = false; + int err; + + if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) + return -EINVAL; + + if (assoc) { + rcu_read_lock(); + have_sta = sta_info_get(sdata, cbss->bssid); + rcu_read_unlock(); + } + + if (!have_sta) { + new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); + if (!new_sta) + return -ENOMEM; + } + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(sdata->local); + mutex_unlock(&local->mtx); + if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[cbss->channel->band]; + + err = ieee80211_prep_channel(sdata, cbss); + if (err) { + sta_info_free(local, new_sta); + return err; + } ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len, -- cgit v1.2.1 From 6962d602056c88ce470f991a265a33132fb95232 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:29:21 +0200 Subject: mac80211: use oper_channel in mesh Using hw.conf.channel is wrong as it could be the temporary channel if any function like the beacon get function is called while scanning or during other temporary out-of-channel activities. Use oper_channel instead. Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6fac18c0423f..03f1696d7d98 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -349,17 +349,18 @@ int mesh_add_ds_params_ie(struct sk_buff *skb, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan = local->oper_channel; u8 *pos; if (skb_tailroom(skb) < 3) return -ENOMEM; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[chan->band]; if (sband->band == IEEE80211_BAND_2GHZ) { pos = skb_put(skb, 2 + 1); *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); + *pos++ = ieee80211_frequency_to_channel(chan->center_freq); } return 0; @@ -603,7 +604,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, - sdata->local->hw.conf.channel->band); + sdata->local->oper_channel->band); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_HT | -- cgit v1.2.1 From 273686d664daae1aa728b76e45720273b26dd876 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:29:21 +0200 Subject: mac80211: use oper_channel in ibss Using hw.conf.channel is wrong as it could be the temporary channel if any function like the beacon get function is called while scanning or during other temporary out-of-channel activities. Use oper_channel instead. Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 5746d62faba1..d06208518eae 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -205,7 +205,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); - bss = cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel, + bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, mgmt, skb->len, 0, GFP_KERNEL); cfg80211_put_bss(bss); netif_carrier_on(sdata->dev); @@ -294,7 +294,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - int band = local->hw.conf.channel->band; + int band = local->oper_channel->band; /* * XXX: Consider removing the least recently used entry and @@ -561,7 +561,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - int band = local->hw.conf.channel->band; + int band = local->oper_channel->band; /* * XXX: Consider removing the least recently used entry and @@ -759,7 +759,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) return; } sdata_info(sdata, "IBSS not allowed on %d MHz\n", - local->hw.conf.channel->center_freq); + local->oper_channel->center_freq); /* No IBSS found - decrease scan interval and continue * scanning. */ -- cgit v1.2.1 From 568d6e289736c9c78cd8723aa81415daffafeff9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:29:21 +0200 Subject: mac80211: use oper_channel in managed mlme Using hw.conf.channel is wrong as it could be the temporary channel if any function like the beacon get function is called while scanning or during other temporary out-of-channel activities. Use oper_channel instead. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 682055207588..2657a5645a8a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -185,15 +185,15 @@ static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata, u16 ht_opmode; bool disable_40 = false; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; switch (sdata->vif.bss_conf.channel_type) { case NL80211_CHAN_HT40PLUS: - if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40PLUS) + if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40PLUS) disable_40 = true; break; case NL80211_CHAN_HT40MINUS: - if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40MINUS) + if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40MINUS) disable_40 = true; break; default: @@ -1274,7 +1274,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) + if (sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -2364,7 +2364,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - if (rx_status->freq != local->hw.conf.channel->center_freq) + if (rx_status->freq != local->oper_channel->center_freq) return; if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon && @@ -2528,7 +2528,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, !(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) { struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, bssid, true); -- cgit v1.2.1 From 679ef4eadde1f8e55074427c0d8de2da55ca81f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:29:21 +0200 Subject: mac80211: use oper_channel in utils and config Using hw.conf.channel is wrong as it could be the temporary channel if any function like the beacon get function is called while scanning or during other temporary out-of-channel activities. Use oper_channel instead. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 +++--- net/mac80211/iface.c | 2 +- net/mac80211/util.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 06b8d39780e9..1b8d19112943 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -330,7 +330,7 @@ static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, in if (!(rate->flags & RATE_INFO_FLAGS_MCS)) { struct ieee80211_supported_band *sband; sband = sta->local->hw.wiphy->bands[ - sta->local->hw.conf.channel->band]; + sta->local->oper_channel->band]; rate->legacy = sband->bitrates[idx].bitrate; } else rate->mcs = idx; @@ -1662,7 +1662,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) { + sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -1928,7 +1928,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_channel *chan = local->hw.conf.channel; + struct ieee80211_channel *chan = local->oper_channel; u32 changes = 0; switch (type) { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bfb57dcc1538..fc8ba83e2c33 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1274,7 +1274,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, /* reset some values that shouldn't be kept across type changes */ sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, - sdata->local->hw.conf.channel->band); + sdata->local->oper_channel->band); sdata->drop_unencrypted = 0; if (type == NL80211_IFTYPE_STATION) sdata->u.mgd.use_4addr = false; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 037d148e9f19..39005eca1a59 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -832,7 +832,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, memset(&qparam, 0, sizeof(qparam)); - use_11b = (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) && + use_11b = (local->oper_channel->band == IEEE80211_BAND_2GHZ) && !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); /* @@ -919,7 +919,7 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, if ((supp_rates[i] & 0x7f) * 5 > 110) have_higher_than_11mbit = 1; - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + if (local->oper_channel->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else -- cgit v1.2.1 From 6b77863b719a4e32909c218c0d5a83a14f4d98c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:53:27 +0200 Subject: mac80211: fix current vs. operating channel in preq/beacon When sending probe requests, e.g. during software scanning, these will go out on the *current* channel, so their IEs need to be built from the current channel. At other times, e.g. for beacons or probe request templates, the IEs will be used on the *operating* channel and using the current channel instead might result in errors. Add the appropriate parameters to respect the difference. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 20 ++++++++++++++------ net/mac80211/ieee80211_i.h | 7 +++++-- net/mac80211/mesh_plink.c | 6 ++++-- net/mac80211/mlme.c | 4 +++- net/mac80211/tx.c | 6 +++--- net/mac80211/util.c | 27 +++++++++++++++------------ 6 files changed, 44 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1b8d19112943..5583f5b73dc9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2655,6 +2655,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_tdls_data *tf; tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); @@ -2674,8 +2675,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_req.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_RESPONSE: @@ -2688,8 +2691,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_CONFIRM: @@ -2727,6 +2732,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; mgmt = (void *)skb_put(skb, 24); @@ -2749,8 +2755,10 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt->u.action.u.tdls_discover_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8e65ad9c870a..0aaad023cdbe 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1459,6 +1459,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, u8 channel); struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, + struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, bool directed); @@ -1489,9 +1490,11 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic); + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band); int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic); + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band); /* channel management */ enum ieee80211_chan_mode { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index fa642c794719..985b37f7455d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -258,8 +258,10 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - if (ieee80211_add_srates_ie(sdata, skb, true) || - ieee80211_add_ext_srates_ie(sdata, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true, + local->oper_channel->band) || + ieee80211_add_ext_srates_ie(sdata, skb, true, + local->oper_channel->band) || mesh_add_rsn_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2657a5645a8a..c416a08d90f1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1683,7 +1683,9 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, ssid_len = ssid[1]; skb = ieee80211_build_probe_req(sdata, cbss->bssid, - (u32) -1, ssid + 2, ssid_len, + (u32) -1, + sdata->local->oper_channel, + ssid + 2, ssid_len, NULL, 0, true); return skb; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7558ba58ea23..b559c6bd8681 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2303,7 +2303,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_if_ap *ap = NULL; struct beacon_data *beacon; struct ieee80211_supported_band *sband; - enum ieee80211_band band = local->hw.conf.channel->band; + enum ieee80211_band band = local->oper_channel->band; struct ieee80211_tx_rate_control txrc; sband = local->hw.wiphy->bands[band]; @@ -2429,9 +2429,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - if (ieee80211_add_srates_ie(sdata, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true, band) || mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(sdata, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true, band) || mesh_add_rsn_ie(skb, sdata) || mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata) || diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 39005eca1a59..99e4258bdb26 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1100,6 +1100,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, + struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, bool directed) @@ -1109,7 +1110,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; size_t buf_len; u8 *buf; - u8 chan; + u8 chan_no; /* FIXME: come up with a proper value */ buf = kmalloc(200 + ie_len, GFP_KERNEL); @@ -1122,14 +1123,12 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, * badly-behaved APs don't respond when this parameter is included. */ if (directed) - chan = 0; + chan_no = 0; else - chan = ieee80211_frequency_to_channel( - local->hw.conf.channel->center_freq); + chan_no = ieee80211_frequency_to_channel(chan->center_freq); - buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, - local->hw.conf.channel->band, - ratemask, chan); + buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, chan->band, + ratemask, chan_no); skb = ieee80211_probereq_get(&local->hw, &sdata->vif, ssid, ssid_len, @@ -1158,7 +1157,9 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, { struct sk_buff *skb; - skb = ieee80211_build_probe_req(sdata, dst, ratemask, ssid, ssid_len, + skb = ieee80211_build_probe_req(sdata, dst, ratemask, + sdata->local->hw.conf.channel, + ssid, ssid_len, ie, ie_len, directed); if (skb) { if (no_cck) @@ -1810,7 +1811,8 @@ ieee80211_ht_oper_to_channel_type(struct ieee80211_ht_operation *ht_oper) } int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic) + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -1818,7 +1820,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, u8 i, rates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; rates = sband->n_bitrates; if (rates > 8) rates = 8; @@ -1841,7 +1843,8 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, } int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic) + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -1849,7 +1852,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; exrates = sband->n_bitrates; if (exrates > 8) exrates -= 8; -- cgit v1.2.1 From 2d56577bc68e56097a1cd6599b678e8cab758e64 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 15:12:51 +0200 Subject: mac80211: use correct channel in TX Since we only need the band, remove the channel pointer from struct ieee80211_tx_data and also assign it properly, depending on context, to the correct operating or current channel. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/tx.c | 17 +++++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0aaad023cdbe..d1a7c58a8c62 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -193,8 +193,6 @@ struct ieee80211_tx_data { struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_channel *channel; - unsigned int flags; }; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b559c6bd8681..5b81660a35b3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -55,7 +55,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) return 0; - sband = local->hw.wiphy->bands[tx->channel->band]; + sband = local->hw.wiphy->bands[info->band]; txrate = &sband->bitrates[info->control.rates[0].idx]; erp = txrate->flags & IEEE80211_RATE_ERP_G; @@ -615,7 +615,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) memset(&txrc, 0, sizeof(txrc)); - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + sband = tx->local->hw.wiphy->bands[info->band]; len = min_t(u32, tx->skb->len + FCS_LEN, tx->local->hw.wiphy->frag_threshold); @@ -626,13 +626,13 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band]; + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; memcpy(txrc.rate_idx_mcs_mask, - tx->sdata->rc_rateidx_mcs_mask[tx->channel->band], + tx->sdata->rc_rateidx_mcs_mask[info->band], sizeof(txrc.rate_idx_mcs_mask)); txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || @@ -667,7 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) "scanning and associated. Target station: " "%pM on %d GHz band\n", tx->sdata->name, hdr->addr1, - tx->channel->band ? 5 : 2)) + info->band ? 5 : 2)) return TX_DROP; /* @@ -1131,7 +1131,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->skb = skb; tx->local = local; tx->sdata = sdata; - tx->channel = local->hw.conf.channel; __skb_queue_head_init(&tx->skbs); /* @@ -1400,8 +1399,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, goto out; } - tx.channel = local->hw.conf.channel; - info->band = tx.channel->band; + info->band = local->hw.conf.channel->band; /* set up hw_queue value early */ if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || @@ -2710,8 +2708,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); tx.flags |= IEEE80211_TX_PS_BUFFERED; - tx.channel = local->hw.conf.channel; - info->band = tx.channel->band; + info->band = local->oper_channel->band; if (invoke_tx_handlers(&tx)) skb = NULL; -- cgit v1.2.1 From c405c6298eacd423098afacf6020ddbda1b0378b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Jul 2012 19:44:12 +0200 Subject: mac80211: manage carrier state in mesh Instead of assuming the carrier is on all the time in mesh manage it with joining and leaving the mesh. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 12 ++++++++---- net/mac80211/mesh.c | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fc8ba83e2c33..c65a03ba809f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -539,12 +539,16 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_AP) + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: netif_carrier_off(dev); - else + break; + default: netif_carrier_on(dev); + } /* * set default queue parameters so drivers don't diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 03f1696d7d98..571d5183060e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -610,6 +610,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT); + + netif_carrier_on(sdata->dev); } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) @@ -617,6 +619,8 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + netif_carrier_off(sdata->dev); + ifmsh->mesh_id_len = 0; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); sta_info_flush(local, NULL); -- cgit v1.2.1 From 1411af156524ce42c2a7f989320c4484257f3ff5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Jul 2012 19:48:09 +0200 Subject: mac80211: enable WDS carrier only after adding station Enable the carrier on WDS type interfaces only after having added the station entry for the WDS peer so outgoing frames will find it. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c65a03ba809f..2d6ac78971ea 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -546,6 +546,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) case NL80211_IFTYPE_MESH_POINT: netif_carrier_off(dev); break; + case NL80211_IFTYPE_WDS: + break; default: netif_carrier_on(dev); } @@ -580,6 +582,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) } rate_control_rate_init(sta); + netif_carrier_on(dev); } /* -- cgit v1.2.1 From e83e6541cee0a12bc445b0f4fad5214df5803087 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Jul 2012 16:23:07 +0200 Subject: mac80211: use eth_broadcast_addr Instead of memset(). Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- net/mac80211/ibss.c | 2 +- net/mac80211/mesh_pathtbl.c | 2 +- net/mac80211/tx.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5583f5b73dc9..df64b455821d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -950,7 +950,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ - memset(msg->da, 0xff, ETH_ALEN); + eth_broadcast_addr(msg->da); memcpy(msg->sa, sta->sta.addr, ETH_ALEN); msg->len = htons(6); msg->dsap = 0; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index d06208518eae..1ebda2f9e57b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -109,7 +109,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); - memset(mgmt->da, 0xff, ETH_ALEN); + eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 075bc535c601..bec7b281b5ba 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -531,7 +531,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) read_lock_bh(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); - memset(new_mpath->rann_snd_addr, 0xff, ETH_ALEN); + eth_broadcast_addr(new_mpath->rann_snd_addr); new_mpath->is_root = false; new_mpath->sdata = sdata; new_mpath->flags = 0; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5b81660a35b3..7dbcf293708b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2415,7 +2415,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); - memset(mgmt->da, 0xff, ETH_ALEN); + eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.beacon.beacon_int = @@ -2609,9 +2609,9 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, memset(hdr, 0, sizeof(*hdr)); hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); - memset(hdr->addr1, 0xff, ETH_ALEN); + eth_broadcast_addr(hdr->addr1); memcpy(hdr->addr2, vif->addr, ETH_ALEN); - memset(hdr->addr3, 0xff, ETH_ALEN); + eth_broadcast_addr(hdr->addr3); pos = skb_put(skb, ie_ssid_len); *pos++ = WLAN_EID_SSID; -- cgit v1.2.1 From 19c3b8303d4686aa373c669ee833609b3fb403cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 20:13:36 +0200 Subject: mac80211: reset station MLME flags upon new association When associating anew, the old station MLME flags should be cleared. The only exception is the 40 MHz disable flag as it might have been set while the channel was set in a previous authentication attempt so it needs to be kept intact. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c416a08d90f1..9d60b4993635 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3357,10 +3357,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } /* prepare assoc data */ - - ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; - ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; - ifmgd->flags &= ~IEEE80211_STA_DISABLE_VHT; + + /* + * keep only the 40 MHz disable bit set as it might have + * been set during authentication already, all other bits + * should be reset for a new connection + */ + ifmgd->flags &= IEEE80211_STA_DISABLE_40MHZ; ifmgd->beacon_crc_valid = false; -- cgit v1.2.1 From 0d466b9c6798d431141ab15ae6d5ea413b4d09b2 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 3 Aug 2012 12:21:32 -0700 Subject: mac80211: improve cleanup when leaving mesh It is not necessary to stop the mesh beacon in the mac80211 ndo_stop handler, since cfg80211 has already left the mesh on NETDEV_GOING_DOWN notification. Also some improvements to ieee80211_stop_mesh(): - flush mpath entries. - flush sta entries per-sdata so we don't remove entries belonging to other vifs on the same hw. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 8 -------- net/mac80211/mesh.c | 6 +++++- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2d6ac78971ea..5a81577879ed 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -777,14 +777,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); - /* - * Disable beaconing here for mesh only, AP and IBSS - * are already taken care of. - */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BEACON_ENABLED); - /* * Free all remaining keys, there shouldn't be any, * except maybe group keys in AP more or WDS? diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 571d5183060e..035cd0c8ce33 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -621,9 +621,13 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) netif_carrier_off(sdata->dev); + /* stop the beacon */ ifmsh->mesh_id_len = 0; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - sta_info_flush(local, NULL); + + /* flush STAs and mpaths on this iface */ + sta_info_flush(sdata->local, sdata); + mesh_path_flush_by_iface(sdata); del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); -- cgit v1.2.1 From 3e17f2be31f354fe03e1732bc527a31ff3dd3bb9 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 3 Aug 2012 12:21:33 -0700 Subject: mac80211: remove ieee80211_clean_sdata() This function was only used by mesh, and not really needed since any interface-specific cleanup already happens in the netdev handlers. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5a81577879ed..abee3a0c25ed 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1155,18 +1155,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_add_netdev(sdata); } -static void ieee80211_clean_sdata(struct ieee80211_sub_if_data *sdata) -{ - switch (sdata->vif.type) { - case NL80211_IFTYPE_MESH_POINT: - mesh_path_flush_by_iface(sdata); - break; - - default: - break; - } -} - static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type) { @@ -1502,9 +1490,6 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); - /* clean up type-dependent data */ - ieee80211_clean_sdata(sdata); - synchronize_rcu(); unregister_netdevice(sdata->dev); } @@ -1524,8 +1509,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); - ieee80211_clean_sdata(sdata); - unregister_netdevice_queue(sdata->dev, &unreg_list); } mutex_unlock(&local->iflist_mtx); -- cgit v1.2.1 From e7570dfb635b0c89570852002c9f85dd1cf82ba1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 3 Aug 2012 12:21:34 -0700 Subject: mac80211: don't request ack for peering close It doesn't make a lot of sense to wait for an ack in response to a peering close frame since either peer in this exchange could be going down. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 985b37f7455d..bad5126c8483 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -217,6 +217,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; + struct ieee80211_tx_info *info; struct ieee80211_mgmt *mgmt; bool include_plid = false; u16 peering_proto = 0; @@ -238,6 +239,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.ie_len); if (!skb) return -1; + info = IEEE80211_SKB_CB(skb); skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); @@ -267,6 +269,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mesh_add_meshconf_ie(skb, sdata)) return -1; } else { /* WLAN_SP_MESH_PEERING_CLOSE */ + info->flags |= IEEE80211_TX_CTL_NO_ACK; if (mesh_add_meshid_ie(skb, sdata)) return -1; } -- cgit v1.2.1 From f609a43dca2964a8a604ef554be92fa11c3b4c41 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 3 Aug 2012 12:21:35 -0700 Subject: mac80211: skb leak in mesh_plink_frame_tx() Although adding an IE is almost guaranteed to succeed since we already accounted for its length while allocating the skb, we should still free the skb in case of failure. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index bad5126c8483..5fd1250f7866 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -224,6 +224,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *pos, ie_len = 4; int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + sizeof(mgmt->u.action.u.self_prot); + int err = -ENOMEM; skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -267,11 +268,11 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mesh_add_rsn_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata)) - return -1; + goto free; } else { /* WLAN_SP_MESH_PEERING_CLOSE */ info->flags |= IEEE80211_TX_CTL_NO_ACK; if (mesh_add_meshid_ie(skb, sdata)) - return -1; + goto free; } /* Add Mesh Peering Management element */ @@ -290,11 +291,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, ie_len += 2; /* reason code */ break; default: - return -EINVAL; + err = -EINVAL; + goto free; } if (WARN_ON(skb_tailroom(skb) < 2 + ie_len)) - return -ENOMEM; + goto free; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PEER_MGMT; @@ -315,14 +317,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action != WLAN_SP_MESH_PEERING_CLOSE) { if (mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata)) - return -1; + goto free; } if (mesh_add_vendor_ies(skb, sdata)) - return -1; + goto free; ieee80211_tx_skb(sdata, skb); return 0; +free: + kfree_skb(skb); + return err; } /** -- cgit v1.2.1 From 40384999d142552b81aeb596c2ae663dd3ff60ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Aug 2012 21:06:50 +0000 Subject: ipv4: change inet_addr_hash() Use net_hash_mix(net) instead of hash_ptr(net, 8), and use hash_32() instead of using a serie of XOR Define IN4_ADDR_HSIZE_SHIFT for clarity __ip_dev_find() can perform the net_eq() call only if ifa_local matches the key, to avoid unneeded dereferences. remove inline attributes # size net/ipv4/devinet.o.before net/ipv4/devinet.o text data bss dec hex filename 17471 2545 2048 22064 5630 net/ipv4/devinet.o.before 17335 2545 2048 21928 55a8 net/ipv4/devinet.o Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef7..adf273f8ad2e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; -/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE - * value. So if you change this define, make appropriate changes to - * inet_addr_hash as well. - */ -#define IN4_ADDR_HSIZE 256 +#define IN4_ADDR_HSIZE_SHIFT 8 +#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) + static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; static DEFINE_SPINLOCK(inet_addr_hash_lock); -static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ hash_ptr(net, 8); + u32 val = (__force u32) addr ^ net_hash_mix(net); - return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & - (IN4_ADDR_HSIZE - 1)); + return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { - unsigned int hash = inet_addr_hash(net, ifa->ifa_local); + u32 hash = inet_addr_hash(net, ifa->ifa_local); spin_lock(&inet_addr_hash_lock); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); @@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - unsigned int hash = inet_addr_hash(net, addr); + u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; struct hlist_node *node; rcu_read_lock(); hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; if (ifa->ifa_local == addr) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; result = dev; break; } @@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, static void devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct in_device *idev); #else -static inline void devinet_sysctl_register(struct in_device *idev) +static void devinet_sysctl_register(struct in_device *idev) { } -static inline void devinet_sysctl_unregister(struct in_device *idev) +static void devinet_sysctl_unregister(struct in_device *idev) { } #endif @@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head) kfree(ifa); } -static inline void inet_free_ifa(struct in_ifaddr *ifa) +static void inet_free_ifa(struct in_ifaddr *ifa) { call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } @@ -659,7 +656,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg * Determine a default network mask, based on the IP address. */ -static inline int inet_abc_len(__be32 addr) +static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ @@ -1124,7 +1121,7 @@ skip: } } -static inline bool inetdev_valid_mtu(unsigned int mtu) +static bool inetdev_valid_mtu(unsigned int mtu) { return mtu >= 68; } @@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = { .notifier_call = inetdev_event, }; -static inline size_t inet_nlmsg_size(void) +static size_t inet_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(4) /* IFA_ADDRESS */ -- cgit v1.2.1 From 9eb43e765368f835d92c93844ebce30da7efeb84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Aug 2012 21:27:25 +0000 Subject: ipv4: Introduce IN_DEV_NET_ROUTE_LOCALNET performance profiles show a high cost in the IN_DEV_ROUTE_LOCALNET() call done in ip_route_input_slow(), because of multiple dereferences, even if cache lines are clean and available in cpu caches. Since we already have the 'net' pointer, introduce IN_DEV_NET_ROUTE_LOCALNET() macro avoiding two dereferences (dev_net(in_dev->dev)) Also change the tests to use IN_DEV_NET_ROUTE_LOCALNET() only if saddr or/and daddr are loopback addresse. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ba974f143c..21ad369014c0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1587,11 +1587,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(daddr)) goto martian_destination; - if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { - if (ipv4_is_loopback(daddr)) + /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), + * and call it once if daddr or/and saddr are loopback addresses + */ + if (ipv4_is_loopback(daddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_destination; - - if (ipv4_is_loopback(saddr)) + } else if (ipv4_is_loopback(saddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_source; } -- cgit v1.2.1 From bb4b2a9ae38ef3bac69627f35e4f916752631fd1 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:40 +0300 Subject: Bluetooth: mgmt: Managing only BR/EDR HCI controllers Add check that HCI controller is BR/EDR. AMP controller shall not be managed by mgmt interface and consequently user space. Signed-off-by: Andrei Emeltchenko Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 6 ++++-- net/bluetooth/mgmt.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5a..fa974a19d365 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -696,7 +696,8 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); hci_dev_unlock(hdev); @@ -797,7 +798,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 0); hci_dev_unlock(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca6..2a0f695e33d4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -193,6 +193,11 @@ static u8 mgmt_status_table[] = { MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ }; +bool mgmt_valid_hdev(struct hci_dev *hdev) +{ + return hdev->dev_type == HCI_BREDR; +} + static u8 mgmt_status(u8 hci_status) { if (hci_status < ARRAY_SIZE(mgmt_status_table)) @@ -317,7 +322,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_index_list *rp; - struct list_head *p; struct hci_dev *d; size_t rp_len; u16 count; @@ -328,7 +332,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, read_lock(&hci_dev_list_lock); count = 0; - list_for_each(p, &hci_dev_list) { + list_for_each_entry(d, &hci_dev_list, list) { + if (!mgmt_valid_hdev(d)) + continue; + count++; } @@ -346,6 +353,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_SETUP, &d->dev_flags)) continue; + if (!mgmt_valid_hdev(d)) + continue; + rp->index[i++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -2820,6 +2830,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data) int mgmt_index_added(struct hci_dev *hdev) { + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); } @@ -2827,6 +2840,9 @@ int mgmt_index_removed(struct hci_dev *hdev) { u8 status = MGMT_STATUS_INVALID_INDEX; + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); -- cgit v1.2.1 From 8e8c7e36fb216d2d072116de3bec6130627ad691 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:41 +0300 Subject: Bluetooth: debug: Fix printing A2MP cmd code format Print A2MP code format according to Bluetooth style. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/a2mp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 4ff0bf3ba9a5..57094c136230 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -316,7 +316,7 @@ send_rsp: static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, struct a2mp_cmd *hdr) { - BT_DBG("ident %d code %d", hdr->ident, hdr->code); + BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code); skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -335,7 +335,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) struct a2mp_cmd *hdr = (void *) skb->data; u16 len = le16_to_cpu(hdr->len); - BT_DBG("code 0x%02x id %d len %d", hdr->code, hdr->ident, len); + BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len); skb_pull(skb, sizeof(*hdr)); -- cgit v1.2.1 From d9fc1d54f6f8123909cdee4fa98ab1ebf6c8651c Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:46 +0300 Subject: Bluetooth: Do not shadow hdr variable Fix compile warnings below: ... net/bluetooth/a2mp.c:505:33: warning: symbol 'hdr' shadows an earlier one net/bluetooth/a2mp.c:498:25: originally declared here ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/a2mp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 57094c136230..79af661a58dd 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -325,15 +325,17 @@ static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, /* Handle A2MP signalling */ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct a2mp_cmd *hdr = (void *) skb->data; + struct a2mp_cmd *hdr; struct amp_mgr *mgr = chan->data; int err = 0; amp_mgr_get(mgr); while (skb->len >= sizeof(*hdr)) { - struct a2mp_cmd *hdr = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); + u16 len; + + hdr = (void *) skb->data; + len = le16_to_cpu(hdr->len); BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len); @@ -393,7 +395,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) if (err) { struct a2mp_cmd_rej rej; + rej.reason = __constant_cpu_to_le16(0); + hdr = (void *) skb->data; BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err); -- cgit v1.2.1 From 78eb2f985c17eab34a0ef919b3263d6b08064e98 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:47 +0300 Subject: Bluetooth: Fix processing A2MP chan in security_cfm Do not process A2MP channel in l2cap_security_cfm Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db04bfb..64d88a603f0a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5404,6 +5404,11 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid, state_to_string(chan->state)); + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + l2cap_chan_unlock(chan); + continue; + } + if (chan->scid == L2CAP_CID_LE_DATA) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; -- cgit v1.2.1 From ed3fa31f35896b42c54333edabf0a9e986fa952c Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:46 -0300 Subject: Bluetooth: Use lmp_bredr_capable where applicable This patch replaces all LMP_NO_BREDR bit checking by the helper macro lmp_bredr_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2a0f695e33d4..990ec6affca5 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -383,7 +383,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (hdev->features[6] & LMP_SIMPLE_PAIR) settings |= MGMT_SETTING_SSP; - if (!(hdev->features[4] & LMP_NO_BREDR)) { + if (lmp_bredr_capable(hdev)) { settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; } @@ -413,7 +413,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_PAIRABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_PAIRABLE; - if (!(hdev->features[4] & LMP_NO_BREDR)) + if (lmp_bredr_capable(hdev)) settings |= MGMT_SETTING_BREDR; if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) -- cgit v1.2.1 From c383ddc481a1774702473b4bb0d2927aab3f2d5a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:47 -0300 Subject: Bluetooth: Use lmp_le_capable where applicable This patch replaces all LMP_LE bit checking by the helper macro lmp_le_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/mgmt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978a33f9..498d55edcbc3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -541,7 +541,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * Features Notification */ } - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); @@ -746,7 +746,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, break; } - if (test_bit(HCI_INIT, &hdev->flags) && hdev->features[4] & LMP_LE) + if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) hci_set_le_support(hdev); done: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 990ec6affca5..0351bf27f2bb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -391,7 +391,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (enable_hs) settings |= MGMT_SETTING_HS; - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) settings |= MGMT_SETTING_LE; return settings; @@ -1205,7 +1205,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[4] & LMP_LE)) { + if (!lmp_le_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_NOT_SUPPORTED); goto unlock; -- cgit v1.2.1 From 9a1a1996d54a92cae2affa1de689cb04ebe7bce1 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:48 -0300 Subject: Bluetooth: Use lmp_ssp_capable where applicable This patch replaces all LMP_SIMPLE_PAIR bit checking by the helper macro lmp_ssp_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- net/bluetooth/mgmt.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 498d55edcbc3..b120388678fd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -528,7 +528,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[7] & LMP_LSTO) events[6] |= 0x80; /* Link Supervision Timeout Changed */ - if (hdev->features[6] & LMP_SIMPLE_PAIR) { + if (lmp_ssp_capable(hdev)) { events[6] |= 0x01; /* IO Capability Request */ events[6] |= 0x02; /* IO Capability Response */ events[6] |= 0x04; /* User Confirmation Request */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0351bf27f2bb..a3329cbd3e4d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -380,7 +380,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_PAIRABLE; - if (hdev->features[6] & LMP_SIMPLE_PAIR) + if (lmp_ssp_capable(hdev)) settings |= MGMT_SETTING_SSP; if (lmp_bredr_capable(hdev)) { @@ -1121,7 +1121,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_NOT_SUPPORTED); goto failed; @@ -2201,7 +2201,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_NOT_SUPPORTED); goto unlock; -- cgit v1.2.1 From 45db810fb71a94926c10f4dbbb5ca7913983f83b Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:49 -0300 Subject: Bluetooth: Use lmp_esco_capable where applicable This patch replaces all LMP_ESCO bit checking by the helper macro lmp_esco_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b120388678fd..5722a388ada1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -686,7 +686,7 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, hdev->esco_type |= (ESCO_HV3); } - if (hdev->features[3] & LMP_ESCO) + if (lmp_esco_capable(hdev)) hdev->esco_type |= (ESCO_EV3); if (hdev->features[4] & LMP_EV4) -- cgit v1.2.1 From 9f92ebf6c72de444801ab4a922965bd1f90834ae Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:50 -0300 Subject: Bluetooth: Use lmp_rswitch_capable where applicable This patch replaces all LMP_RSWITCH bit checking by the helper macro lmp_rswitch_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5722a388ada1..5c71c85c87a7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -623,7 +623,7 @@ static void hci_setup_link_policy(struct hci_dev *hdev) struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; - if (hdev->features[0] & LMP_RSWITCH) + if (lmp_rswitch_capable(hdev)) link_policy |= HCI_LP_RSWITCH; if (hdev->features[0] & LMP_HOLD) link_policy |= HCI_LP_HOLD; -- cgit v1.2.1 From 6eded1004abb060fbdf69611abc560c717f2bb8b Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:51 -0300 Subject: Bluetooth: Use lmp_sniff_capable where applicable This patch replaces all LMP_SNIFF bit checking by the helper macro lmp_sniff_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5c71c85c87a7..eb9024a8684c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -627,7 +627,7 @@ static void hci_setup_link_policy(struct hci_dev *hdev) link_policy |= HCI_LP_RSWITCH; if (hdev->features[0] & LMP_HOLD) link_policy |= HCI_LP_HOLD; - if (hdev->features[0] & LMP_SNIFF) + if (lmp_sniff_capable(hdev)) link_policy |= HCI_LP_SNIFF; if (hdev->features[1] & LMP_PARK) link_policy |= HCI_LP_PARK; -- cgit v1.2.1 From 999dcd10a88243ab304966a506b4975ce5f1e3bb Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:52 -0300 Subject: Bluetooth: Use lmp_sniffsubr_capable where applicable This patch replaces all LMP_SNIFF_SUBR bit checking by the helper macro lmp_sniffsubr_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index eb9024a8684c..060780c40ba1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -513,7 +513,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x02; /* Inquiry Result with RSSI */ - if (hdev->features[5] & LMP_SNIFF_SUBR) + if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ if (hdev->features[5] & LMP_PAUSE_ENC) -- cgit v1.2.1 From c58e810eb0916f9197378435af72136fb7c97f43 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 24 Jul 2012 15:03:53 -0300 Subject: Bluetooth: Use lmp_no_flush_capable where applicable This patch replaces all LMP_NO_FLUSH bit checking by the helper macro lmp_no_flush_capable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 060780c40ba1..2d8761b8b19c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -522,7 +522,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[6] & LMP_EXT_INQ) events[5] |= 0x40; /* Extended Inquiry Result */ - if (hdev->features[6] & LMP_NO_FLUSH) + if (lmp_no_flush_capable(hdev)) events[7] |= 0x01; /* Enhanced Flush Complete */ if (hdev->features[7] & LMP_LSTO) -- cgit v1.2.1 From 9e66463127ff7238020c3c4e7f84dfbc23e5c2b5 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 24 Jul 2012 16:06:15 +0300 Subject: Bluetooth: Make connect / disconnect cfm functions return void Return values are never used because callers hci_proto_connect_cfm and hci_proto_disconn_cfm return void. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 6 ++---- net/bluetooth/sco.c | 7 ++----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 64d88a603f0a..8391e0575494 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5329,7 +5329,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return exact ? lm1 : lm2; } -int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn; @@ -5342,7 +5342,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) } else l2cap_conn_del(hcon, bt_to_errno(status)); - return 0; } int l2cap_disconn_ind(struct hci_conn *hcon) @@ -5356,12 +5355,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon) return conn->disc_reason; } -int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); - return 0; } static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25dcff7..0ef5a78a889f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -913,7 +913,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return lm; } -int sco_connect_cfm(struct hci_conn *hcon, __u8 status) +void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (!status) { @@ -924,16 +924,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status) sco_conn_ready(conn); } else sco_conn_del(hcon, bt_to_errno(status)); - - return 0; } -int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); - return 0; } int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) -- cgit v1.2.1 From ee72d150ada90d33cc6e222fbdd7f980c16d974d Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Mon, 23 Jul 2012 18:19:04 -0300 Subject: Bluetooth: Remove locking in hci_user_passkey_request_evt This patch removes hdev locking in hci_user_passkey_request_evt since it is not needed. mgmt_user_passkey_request simply calls mgmt_event which does not require hdev locking at all. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2d8761b8b19c..0386e1e72275 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3252,12 +3252,8 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, BT_DBG("%s", hdev->name); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); - - hci_dev_unlock(hdev); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, -- cgit v1.2.1 From 3064837289259843310b266a9422aca5f5b4b9c7 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Fri, 13 Jul 2012 18:17:54 +0530 Subject: Bluetooth: Move l2cap_chan_hold/put to l2cap_core.c Refactor the code in order to use the l2cap_chan_destroy() from l2cap_chan_put() under the refcnt protection. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Syam Sidhardhan Reviewed-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8391e0575494..79923d8bbe97 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -425,6 +425,21 @@ void l2cap_chan_destroy(struct l2cap_chan *chan) l2cap_chan_put(chan); } +void l2cap_chan_hold(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + + atomic_inc(&c->refcnt); +} + +void l2cap_chan_put(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + + if (atomic_dec_and_test(&c->refcnt)) + kfree(c); +} + void l2cap_chan_set_defaults(struct l2cap_chan *chan) { chan->fcs = L2CAP_FCS_CRC16; -- cgit v1.2.1 From 4af66c691f4e5c2db9bb00793669a548e9db1974 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Fri, 13 Jul 2012 18:17:55 +0530 Subject: Bluetooth: Free the l2cap channel list only when refcount is zero Move the l2cap channel list chan->global_l under the refcnt protection and free it based on the refcnt. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Syam Sidhardhan Reviewed-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/a2mp.c | 2 +- net/bluetooth/l2cap_core.c | 8 +++++--- net/bluetooth/l2cap_sock.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 79af661a58dd..0760d1fed6f0 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -416,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) static void a2mp_chan_close_cb(struct l2cap_chan *chan) { - l2cap_chan_destroy(chan); + l2cap_chan_put(chan); } static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 79923d8bbe97..9f8b29ef5b68 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -416,13 +416,15 @@ struct l2cap_chan *l2cap_chan_create(void) return chan; } -void l2cap_chan_destroy(struct l2cap_chan *chan) +static void l2cap_chan_destroy(struct l2cap_chan *chan) { + BT_DBG("chan %p", chan); + write_lock(&chan_list_lock); list_del(&chan->global_l); write_unlock(&chan_list_lock); - l2cap_chan_put(chan); + kfree(chan); } void l2cap_chan_hold(struct l2cap_chan *c) @@ -437,7 +439,7 @@ void l2cap_chan_put(struct l2cap_chan *c) BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); if (atomic_dec_and_test(&c->refcnt)) - kfree(c); + l2cap_chan_destroy(c); } void l2cap_chan_set_defaults(struct l2cap_chan *chan) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e8427e..79350d10087c 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -823,7 +823,7 @@ static void l2cap_sock_kill(struct sock *sk) /* Kill poor orphan */ - l2cap_chan_destroy(l2cap_pi(sk)->chan); + l2cap_chan_put(l2cap_pi(sk)->chan); sock_set_flag(sk, SOCK_DEAD); sock_put(sk); } -- cgit v1.2.1 From 256a06c8a85df676e80263af349daad1283e529e Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:26:32 +0900 Subject: Bluetooth: /proc/net/ entries for bluetooth protocols lsof command can tell the type of socket processes are using. Internal lsof uses inode numbers on socket fs to resolve the type of sockets. Files under /proc/net/, such as tcp, udp, unix, etc provides such inode information. Unfortunately bluetooth related protocols don't provide such inode information. This patch series introduces /proc/net files for the protocols. This patch against af_bluetooth.c provides facility to the implementation of protocols. This patch extends bt_sock_list and introduces two exported function bt_procfs_init, bt_procfs_cleanup. The type bt_sock_list is already used in some of implementation of protocols. bt_procfs_init prepare seq_operations which converts protocol own bt_sock_list data to protocol own proc entry when the entry is accessed. What I, lsof user, need is just inode number of bluetooth socket. However, people may want more information. The bt_procfs_init takes a function pointer for customizing the show handler of seq_operations. In v4 patch, __acquires and __releases attributes are added to suppress sparse warning. Suggested by Andrei Emeltchenko. In v5 patch, linux/proc_fs.h is included to use PDE. Build error is reported by Fengguang Wu. Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/af_bluetooth.c | 141 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f7db5792ec64..58f9762b339a 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -28,6 +28,7 @@ #include #include +#include #define VERSION "2.16" @@ -532,6 +533,146 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) } EXPORT_SYMBOL(bt_sock_wait_state); +#ifdef CONFIG_PROC_FS +struct bt_seq_state { + struct bt_sock_list *l; +}; + +static void *bt_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_lock(&l->lock); + return seq_hlist_start_head(&l->head, *pos); +} + +static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + return seq_hlist_next(v, &l->head, pos); +} + +static void bt_seq_stop(struct seq_file *seq, void *v) + __releases(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_unlock(&l->lock); +} + +static int bt_seq_show(struct seq_file *seq, void *v) +{ + struct sock *sk; + struct bt_sock *bt; + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + bdaddr_t src_baswapped, dst_baswapped; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Src Dst Parent"); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } else { + sk = sk_entry(v); + bt = bt_sk(sk); + baswap(&src_baswapped, &bt->src); + baswap(&dst_baswapped, &bt->dst); + + seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu", + sk, + atomic_read(&sk->sk_refcnt), + sk_rmem_alloc_get(sk), + sk_wmem_alloc_get(sk), + sock_i_uid(sk), + sock_i_ino(sk), + &src_baswapped, + &dst_baswapped, + bt->parent? sock_i_ino(bt->parent): 0LU); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } + return 0; +} + +static struct seq_operations bt_seq_ops = { + .start = bt_seq_start, + .next = bt_seq_next, + .stop = bt_seq_stop, + .show = bt_seq_show, +}; + +static int bt_seq_open(struct inode *inode, struct file *file) +{ + struct bt_sock_list *sk_list; + struct bt_seq_state *s; + + sk_list = PDE(inode)->data; + s = __seq_open_private(file, &bt_seq_ops, + sizeof(struct bt_seq_state)); + if (s == NULL) + return -ENOMEM; + + s->l = sk_list; + return 0; +} + +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + struct proc_dir_entry * pde; + + sk_list->custom_seq_show = seq_show; + + sk_list->fops.owner = module; + sk_list->fops.open = bt_seq_open; + sk_list->fops.read = seq_read; + sk_list->fops.llseek = seq_lseek; + sk_list->fops.release = seq_release_private; + + pde = proc_net_fops_create(net, name, 0, &sk_list->fops); + if (pde == NULL) + return -ENOMEM; + + pde->data = sk_list; + + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ + proc_net_remove(net, name); +} +#else +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ +} +#endif +EXPORT_SYMBOL(bt_procfs_init); +EXPORT_SYMBOL(bt_procfs_cleanup); + static struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, -- cgit v1.2.1 From 77cf5585a31fdad48c16ddae9be154f5afe22c1c Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:27:35 +0900 Subject: Bluetooth: Added /proc/net/bnep via bt_procfs_init() Added /proc/net/bnep via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/bnep/sock.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b410e0b..5b6cc0bf4dec 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -29,6 +29,10 @@ #include "bnep.h" +static struct bt_sock_list bnep_sk_list = { + .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock) +}; + static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -38,6 +42,8 @@ static int bnep_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&bnep_sk_list, sk); + sock_orphan(sk); sock_put(sk); return 0; @@ -204,6 +210,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&bnep_sk_list, sk); return 0; } @@ -222,19 +229,30 @@ int __init bnep_sock_init(void) return err; err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register BNEP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create BNEP proc file"); + bt_sock_unregister(BTPROTO_BNEP); + goto error; + } + + BT_INFO("BNEP socket layer initialized"); return 0; error: - BT_ERR("Can't register BNEP socket"); proto_unregister(&bnep_proto); return err; } void __exit bnep_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "bnep"); if (bt_sock_unregister(BTPROTO_BNEP) < 0) BT_ERR("Can't unregister BNEP socket"); -- cgit v1.2.1 From 8c8de589cedd375934d96e073d2b9077a00f10c2 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:28:07 +0900 Subject: Bluetooth: Added /proc/net/cmtp via bt_procfs_init() Added /proc/net/cmtp via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/cmtp/sock.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d14571..d5cacef52748 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -42,6 +42,10 @@ #include "cmtp.h" +static struct bt_sock_list cmtp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock) +}; + static int cmtp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -51,6 +55,8 @@ static int cmtp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&cmtp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -214,6 +220,8 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&cmtp_sk_list, sk); + return 0; } @@ -232,19 +240,30 @@ int cmtp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register CMTP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create CMTP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("CMTP socket layer initialized"); return 0; error: - BT_ERR("Can't register CMTP socket"); proto_unregister(&cmtp_proto); return err; } void cmtp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "cmtp"); if (bt_sock_unregister(BTPROTO_CMTP) < 0) BT_ERR("Can't unregister CMTP socket"); -- cgit v1.2.1 From f7c8663789038c4bc71b81e3c858a35c999347a8 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:28:36 +0900 Subject: Bluetooth: Added /proc/net/hci via bt_procfs_init() Added /proc/net/hci via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_sock.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de03d79..7c3d6c7c6ddb 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1100,21 +1100,30 @@ int __init hci_sock_init(void) return err; err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("HCI socket registration failed"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HCI proc file"); + bt_sock_unregister(BTPROTO_HCI); + goto error; + } BT_INFO("HCI socket layer initialized"); return 0; error: - BT_ERR("HCI socket registration failed"); proto_unregister(&hci_sk_proto); return err; } void hci_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "hci"); if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); -- cgit v1.2.1 From 5c6ad8eee0a8c5fb4ba8b741008490da9eb66af6 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:29:00 +0900 Subject: Bluetooth: Added /proc/net/hidp via bt_procfs_init() Added /proc/net/hidp via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/hidp/sock.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f6892a36..eca3889371c4 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -25,6 +25,10 @@ #include "hidp.h" +static struct bt_sock_list hidp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(hidp_sk_list.lock) +}; + static int hidp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -34,6 +38,8 @@ static int hidp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&hidp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -253,6 +259,8 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&hidp_sk_list, sk); + return 0; } @@ -271,8 +279,19 @@ int __init hidp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register HIDP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HIDP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("HIDP socket layer initialized"); return 0; @@ -284,6 +303,7 @@ error: void __exit hidp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "hidp"); if (bt_sock_unregister(BTPROTO_HIDP) < 0) BT_ERR("Can't unregister HIDP socket"); -- cgit v1.2.1 From 5b28d95c13e876037d2c692e61862bb3e98249af Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:29:25 +0900 Subject: Bluetooth: Added /proc/net/l2cap via bt_procfs_init() Added /proc/net/l2cap via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_sock.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 79350d10087c..13f6a9816feb 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -34,6 +34,10 @@ #include #include +static struct bt_sock_list l2cap_sk_list = { + .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) +}; + static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio); @@ -886,6 +890,8 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&l2cap_sk_list, sk); + err = l2cap_sock_shutdown(sock, 2); sock_orphan(sk); @@ -1210,6 +1216,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; l2cap_sock_init(sk, NULL); + bt_sock_link(&l2cap_sk_list, sk); return 0; } @@ -1248,21 +1255,30 @@ int __init l2cap_init_sockets(void) return err; err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("L2CAP socket registration failed"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create L2CAP proc file"); + bt_sock_unregister(BTPROTO_L2CAP); + goto error; + } BT_INFO("L2CAP socket layer initialized"); return 0; error: - BT_ERR("L2CAP socket registration failed"); proto_unregister(&l2cap_proto); return err; } void l2cap_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "l2cap"); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); -- cgit v1.2.1 From c6f5df16a2710e64090078dfbaa86c54a27c0874 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:29:49 +0900 Subject: Bluetooth: Added /proc/net/rfcomm via bt_procfs_init() Added /proc/net/rfcomm via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/rfcomm/sock.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e59645c05..260821a2d6e7 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1033,8 +1033,17 @@ int __init rfcomm_init_sockets(void) return err; err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("RFCOMM socket layer registration failed"); + goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create RFCOMM proc file"); + bt_sock_unregister(BTPROTO_RFCOMM); goto error; + } if (bt_debugfs) { rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, @@ -1048,13 +1057,14 @@ int __init rfcomm_init_sockets(void) return 0; error: - BT_ERR("RFCOMM socket layer registration failed"); proto_unregister(&rfcomm_proto); return err; } void __exit rfcomm_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "rfcomm"); + debugfs_remove(rfcomm_sock_debugfs); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) -- cgit v1.2.1 From de9b9212c7fd3e87608194f1c73fc06cd6cc7dfb Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 26 Jul 2012 01:30:12 +0900 Subject: Bluetooth: Added /proc/net/sco via bt_procfs_init() Added /proc/net/sco via bt_procfs_init(). Signed-off-by: Masatake YAMATO Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 0ef5a78a889f..caa109df6452 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1023,6 +1023,13 @@ int __init sco_init(void) goto error; } + err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create SCO proc file"); + bt_sock_unregister(BTPROTO_SCO); + goto error; + } + if (bt_debugfs) { sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs, NULL, &sco_debugfs_fops); @@ -1041,6 +1048,8 @@ error: void __exit sco_exit(void) { + bt_procfs_cleanup(&init_net, "sco"); + debugfs_remove(sco_debugfs); if (bt_sock_unregister(BTPROTO_SCO) < 0) -- cgit v1.2.1 From 230fd16a2333fef9c012b054a127c7d157f7a7af Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:10 -0300 Subject: Bluetooth: Trivial refactoring This patch replaces the unlock-and-return statements by the goto statement. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0386e1e72275..e89d7f24136b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3364,8 +3364,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); if (!conn) { BT_ERR("No memory for new connection"); - hci_dev_unlock(hdev); - return; + goto unlock; } conn->dst_type = ev->bdaddr_type; -- cgit v1.2.1 From b9b343d25484bbceaee454ab422daafb1c5eda96 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:11 -0300 Subject: Bluetooth: Fix hci_le_conn_complete_evt We need to check the 'Role' parameter from the LE Connection Complete Event in order to properly set 'out' and 'link_mode' fields from hci_conn structure. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e89d7f24136b..8b13cccd50cd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3368,6 +3368,11 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } conn->dst_type = ev->bdaddr_type; + + if (ev->role == LE_CONN_ROLE_MASTER) { + conn->out = true; + conn->link_mode |= HCI_LM_MASTER; + } } if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) -- cgit v1.2.1 From 847012c5e04544aef485dfec29c1c07dc90615a4 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:12 -0300 Subject: Bluetooth: Remove unneeded code This patch removes some unneeded code from hci_cs_le_create_conn. If the hci_conn is not found, it means this LE connection attempt was triggered by a thrid-party tool (e.g. hcitool). We should not create this new hci_conn in LE Create Connection command status event since it is already properly handled in LE Connection Complete event. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8b13cccd50cd..27064beda10d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1638,16 +1638,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) hci_proto_connect_cfm(conn, status); hci_conn_del(conn); } - } else { - if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); - if (conn) { - conn->dst_type = cp->peer_addr_type; - conn->out = true; - } else { - BT_ERR("No memory for new connection"); - } - } } hci_dev_unlock(hdev); -- cgit v1.2.1 From f00a06ac14becc3d78fecdf2513cc23ee267a96b Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:13 -0300 Subject: Bluetooth: Refactor hci_cs_le_create_conn This patch does some code refactoring in hci_cs_le_create_conn function. The hci_conn object is only needed in case of failure, therefore hdev locking and hci_conn lookup were moved to if-statement scope. Also, the conn->state check was removed since we should always close the connection if it fails. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 27064beda10d..c0aa9f436998 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1623,24 +1623,26 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) if (!cp) return; - hci_dev_lock(hdev); + if (status) { + hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + if (!conn) { + hci_dev_unlock(hdev); + return; + } - BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), - conn); + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), + conn); - if (status) { - if (conn && conn->state == BT_CONNECT) { - conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &cp->peer_addr, conn->type, - conn->dst_type, status); - hci_proto_connect_cfm(conn, status); - hci_conn_del(conn); - } - } + conn->state = BT_CLOSED; + mgmt_connect_failed(hdev, &cp->peer_addr, conn->type, + conn->dst_type, status); + hci_proto_connect_cfm(conn, status); + hci_conn_del(conn); - hci_dev_unlock(hdev); + hci_dev_unlock(hdev); + } } static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) -- cgit v1.2.1 From 0c95ab78be36e56ca69e36bc679f9dfd3d25f31e Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:14 -0300 Subject: Bluetooth: Find hci_conn by BT_CONNECT state This patch changes hci_cs_le_create_conn to perform hci_conn lookup by state instead of bdaddr. Since we can have only one LE connection in BT_CONNECT state, we can perform LE hci_conn lookup by state. This way, we don't rely on hci_sent_cmd_data helper to find the hci_conn object. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c0aa9f436998..8dc1f0fbe9c1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1614,29 +1614,24 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) { - struct hci_cp_le_create_conn *cp; struct hci_conn *conn; BT_DBG("%s status 0x%2.2x", hdev->name, status); - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN); - if (!cp) - return; - if (status) { hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (!conn) { hci_dev_unlock(hdev); return; } - BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&conn->dst), conn); conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &cp->peer_addr, conn->type, + mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); hci_proto_connect_cfm(conn, status); hci_conn_del(conn); -- cgit v1.2.1 From b47a09b33a4612ace2958996ce6e0134be23d043 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:15 -0300 Subject: Bluetooth: Lookup hci_conn in hci_le_conn_complete_evt This patch does a trivial code refactoring in hci_conn lookup in hci_le_conn_complete_evt. It performs the hci_conn lookup at the begining of the function since it is used by both flows (error and success). Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8dc1f0fbe9c1..92522b470f6b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3333,8 +3333,9 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (ev->status) { - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (!conn) goto unlock; @@ -3346,7 +3347,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; } - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); if (!conn) { conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); if (!conn) { -- cgit v1.2.1 From cd17decbd9af41c9548bb108ccf156519f8253ec Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jul 2012 15:10:16 -0300 Subject: Bluetooth: Refactor in hci_le_conn_complete_evt This patch moves the hci_conn check to begining of hci_le_conn_ complete_evt in order to improve code's readability and better error handling. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 92522b470f6b..32e21ad36a68 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3334,19 +3334,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - - if (ev->status) { - if (!conn) - goto unlock; - - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); - hci_proto_connect_cfm(conn, ev->status); - conn->state = BT_CLOSED; - hci_conn_del(conn); - goto unlock; - } - if (!conn) { conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); if (!conn) { @@ -3362,6 +3349,15 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } } + if (ev->status) { + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, ev->status); + hci_proto_connect_cfm(conn, ev->status); + conn->state = BT_CLOSED; + hci_conn_del(conn); + goto unlock; + } + if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) mgmt_device_connected(hdev, &ev->bdaddr, conn->type, conn->dst_type, 0, NULL, 0, NULL); -- cgit v1.2.1 From 269c4845d5b3627b95b1934107251bacbe99bb68 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 15 Jun 2012 02:30:20 -0300 Subject: Bluetooth: Fix possible deadlock in SCO code sco_chan_del() only has conn != NULL when called from sco_conn_del() so just move the code from it that deal with conn to sco_conn_del(). [ 120.765529] [ 120.765529] ====================================================== [ 120.766529] [ INFO: possible circular locking dependency detected ] [ 120.766529] 3.5.0-rc1-10292-g3701f94-dirty #70 Tainted: G W [ 120.766529] ------------------------------------------------------- [ 120.766529] kworker/u:3/1497 is trying to acquire lock: [ 120.766529] (&(&conn->lock)->rlock#2){+.+...}, at: [] sco_chan_del+0x4c/0x170 [bluetooth] [ 120.766529] [ 120.766529] but task is already holding lock: [ 120.766529] (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}, at: [] sco_conn_del+0x61/0xe0 [bluetooth] [ 120.766529] [ 120.766529] which lock already depends on the new lock. [ 120.766529] [ 120.766529] [ 120.766529] the existing dependency chain (in reverse order) is: [ 120.766529] [ 120.766529] -> #1 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}: [ 120.766529] [] lock_acquire+0x8e/0xb0 [ 120.766529] [] _raw_spin_lock+0x40/0x80 [ 120.766529] [] sco_connect_cfm+0x79/0x300 [bluetooth] [ 120.766529] [] hci_sync_conn_complete_evt.isra.90+0x343/0x400 [bluetooth] [ 120.766529] [] hci_event_packet+0x317/0xfb0 [bluetooth] [ 120.766529] [] hci_rx_work+0x2c8/0x890 [bluetooth] [ 120.766529] [] process_one_work+0x197/0x460 [ 120.766529] [] worker_thread+0x126/0x2d0 [ 120.766529] [] kthread+0x9d/0xb0 [ 120.766529] [] kernel_thread_helper+0x4/0x10 [ 120.766529] [ 120.766529] -> #0 (&(&conn->lock)->rlock#2){+.+...}: [ 120.766529] [] __lock_acquire+0x154a/0x1d30 [ 120.766529] [] lock_acquire+0x8e/0xb0 [ 120.766529] [] _raw_spin_lock+0x40/0x80 [ 120.766529] [] sco_chan_del+0x4c/0x170 [bluetooth] [ 120.766529] [] sco_conn_del+0x74/0xe0 [bluetooth] [ 120.766529] [] sco_disconn_cfm+0x32/0x60 [bluetooth] [ 120.766529] [] hci_disconn_complete_evt.isra.53+0x242/0x390 [bluetooth] [ 120.766529] [] hci_event_packet+0x617/0xfb0 [bluetooth] [ 120.766529] [] hci_rx_work+0x2c8/0x890 [bluetooth] [ 120.766529] [] process_one_work+0x197/0x460 [ 120.766529] [] worker_thread+0x126/0x2d0 [ 120.766529] [] kthread+0x9d/0xb0 [ 120.766529] [] kernel_thread_helper+0x4/0x10 [ 120.766529] [ 120.766529] other info that might help us debug this: [ 120.766529] [ 120.766529] Possible unsafe locking scenario: [ 120.766529] [ 120.766529] CPU0 CPU1 [ 120.766529] ---- ---- [ 120.766529] lock(slock-AF_BLUETOOTH-BTPROTO_SCO); [ 120.766529] lock(&(&conn->lock)->rlock#2); [ 120.766529] lock(slock-AF_BLUETOOTH-BTPROTO_SCO); [ 120.766529] lock(&(&conn->lock)->rlock#2); [ 120.766529] [ 120.766529] *** DEADLOCK *** Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25dcff7..3589e21edb09 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err) sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); + + sco_conn_lock(conn); + conn->sk = NULL; + sco_pi(sk)->conn = NULL; + sco_conn_unlock(conn); + + if (conn->hcon) + hci_conn_put(conn->hcon); + sco_sock_kill(sk); } @@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err) BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - if (conn) { - sco_conn_lock(conn); - conn->sk = NULL; - sco_pi(sk)->conn = NULL; - sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_put(conn->hcon); - } - sk->sk_state = BT_CLOSED; sk->sk_err = err; sk->sk_state_change(sk); -- cgit v1.2.1 From a9ea3ed9b71cc3271dd59e76f65748adcaa76422 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 19 Jul 2012 14:46:08 +0200 Subject: Bluetooth: Fix legacy pairing with some devices Some devices e.g. some Android based phones don't do SDP search before pairing and cancel legacy pairing when ACL is disconnected. PIN Code Request event which changes ACL timeout to HCI_PAIRING_TIMEOUT is only received after remote user entered PIN. In that case no L2CAP is connected so default HCI_DISCONN_TIMEOUT (2 seconds) is being used to timeout ACL connection. This results in problems with legacy pairing as remote user has only few seconds to enter PIN before ACL is disconnected. Increase disconnect timeout for incomming connection to HCI_PAIRING_TIMEOUT if SSP is disabled and no linkey exists. To avoid keeping ACL alive for too long after SDP search set ACL timeout back to HCI_DISCONN_TIMEOUT when L2CAP is connected. 2012-07-19 13:24:43.413521 < HCI Command: Create Connection (0x01|0x0005) plen 13 bdaddr 00:02:72:D6:6A:3F ptype 0xcc18 rswitch 0x01 clkoffset 0x0000 Packet type: DM1 DM3 DM5 DH1 DH3 DH5 2012-07-19 13:24:43.425224 > HCI Event: Command Status (0x0f) plen 4 Create Connection (0x01|0x0005) status 0x00 ncmd 1 2012-07-19 13:24:43.885222 > HCI Event: Role Change (0x12) plen 8 status 0x00 bdaddr 00:02:72:D6:6A:3F role 0x01 Role: Slave 2012-07-19 13:24:44.054221 > HCI Event: Connect Complete (0x03) plen 11 status 0x00 handle 42 bdaddr 00:02:72:D6:6A:3F type ACL encrypt 0x00 2012-07-19 13:24:44.054313 < HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2 handle 42 2012-07-19 13:24:44.055176 > HCI Event: Page Scan Repetition Mode Change (0x20) plen 7 bdaddr 00:02:72:D6:6A:3F mode 0 2012-07-19 13:24:44.056217 > HCI Event: Max Slots Change (0x1b) plen 3 handle 42 slots 5 2012-07-19 13:24:44.059218 > HCI Event: Command Status (0x0f) plen 4 Read Remote Supported Features (0x01|0x001b) status 0x00 ncmd 0 2012-07-19 13:24:44.062192 > HCI Event: Command Status (0x0f) plen 4 Unknown (0x00|0x0000) status 0x00 ncmd 1 2012-07-19 13:24:44.067219 > HCI Event: Read Remote Supported Features (0x0b) plen 11 status 0x00 handle 42 Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87 2012-07-19 13:24:44.067248 < HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3 handle 42 page 1 2012-07-19 13:24:44.071217 > HCI Event: Command Status (0x0f) plen 4 Read Remote Extended Features (0x01|0x001c) status 0x00 ncmd 1 2012-07-19 13:24:44.076218 > HCI Event: Read Remote Extended Features (0x23) plen 13 status 0x00 handle 42 page 1 max 1 Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 2012-07-19 13:24:44.076249 < HCI Command: Remote Name Request (0x01|0x0019) plen 10 bdaddr 00:02:72:D6:6A:3F mode 2 clkoffset 0x0000 2012-07-19 13:24:44.081218 > HCI Event: Command Status (0x0f) plen 4 Remote Name Request (0x01|0x0019) status 0x00 ncmd 1 2012-07-19 13:24:44.105214 > HCI Event: Remote Name Req Complete (0x07) plen 255 status 0x00 bdaddr 00:02:72:D6:6A:3F name 'uw000951-0' 2012-07-19 13:24:44.105284 < HCI Command: Authentication Requested (0x01|0x0011) plen 2 handle 42 2012-07-19 13:24:44.111207 > HCI Event: Command Status (0x0f) plen 4 Authentication Requested (0x01|0x0011) status 0x00 ncmd 1 2012-07-19 13:24:44.112220 > HCI Event: Link Key Request (0x17) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.112249 < HCI Command: Link Key Request Negative Reply (0x01|0x000c) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.115215 > HCI Event: Command Complete (0x0e) plen 10 Link Key Request Negative Reply (0x01|0x000c) ncmd 1 status 0x00 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.116215 > HCI Event: PIN Code Request (0x16) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:48.099184 > HCI Event: Auth Complete (0x06) plen 3 status 0x13 handle 42 Error: Remote User Terminated Connection 2012-07-19 13:24:48.179182 > HCI Event: Disconn Complete (0x05) plen 4 status 0x00 handle 42 reason 0x13 Reason: Remote User Terminated Connection Cc: stable@vger.kernel.org Signed-off-by: Szymon Janc Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 7 ++++++- net/bluetooth/l2cap_core.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978a33f9..248632cec11d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1762,7 +1762,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); - conn->disc_timeout = HCI_DISCONN_TIMEOUT; + + if (!conn->out && !hci_conn_ssp_enabled(conn) && + !hci_find_link_key(hdev, &ev->bdaddr)) + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + else + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db04bfb..daa149b7003c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1181,6 +1181,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) sk = chan->sk; hci_conn_hold(conn->hcon); + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); -- cgit v1.2.1 From c810089c27e48b816181b454fcc493d19fdbc2ba Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:09 +0300 Subject: Bluetooth: Fix using NULL inquiry entry If entry wasn't found in the hci_inquiry_cache_lookup_resolve do not resolve the name.This will fix a kernel crash when trying to use NULL pointer. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 248632cec11d..b64cfa213bd6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) return false; e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); + if (!e) + return false; + if (hci_resolve_name(hdev, e) == 0) { e->name_state = NAME_PENDING; return true; -- cgit v1.2.1 From 7cc8380eb10347016d95bf6f9d842c2ae6d12932 Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:10 +0300 Subject: Bluetooth: Fix using a NULL inquiry cache entry If the device was not found in a list of found devices names of which are pending.This may happen in a case when HCI Remote Name Request was sent as a part of incoming connection establishment procedure. Hence there is no need to continue resolving a next name as it will be done upon receiving another Remote Name Request Complete Event. This will fix a kernel crash when trying to use this entry to resolve the next name. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b64cfa213bd6..fe9a3d6d30b0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1396,12 +1396,18 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, return; e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); - if (e) { + /* If the device was not found in a list of found devices names of which + * are pending. there is no need to continue resolving a next name as it + * will be done upon receiving another Remote Name Request Complete + * Event */ + if (!e) + return; + + list_del(&e->list); + if (name) { e->name_state = NAME_KNOWN; - list_del(&e->list); - if (name) - mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, - e->data.rssi, name, name_len); + mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, + e->data.rssi, name, name_len); } if (hci_resolve_next_name(hdev)) -- cgit v1.2.1 From c3e7c0d90b14a3e7ac091d24cef09efb516d587b Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:11 +0300 Subject: Bluetooth: Set name_state to unknown when entry name is empty When the name of the given entry is empty , the state needs to be updated accordingly. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fe9a3d6d30b0..715d7e33fba0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1408,6 +1408,8 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, e->name_state = NAME_KNOWN; mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, e->data.rssi, name, name_len); + } else { + e->name_state = NAME_NOT_KNOWN; } if (hci_resolve_next_name(hdev)) -- cgit v1.2.1 From d08fd0e712a834d4abb869c0215a702e290bc51e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:43 +0300 Subject: Bluetooth: smp: Fix possible NULL dereference smp_chan_create might return NULL so we need to check before dereferencing smp. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 16ef0dc85a0a..901a616c8083 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp = smp_chan_create(conn); + else + smp = conn->smp_chan; - smp = conn->smp_chan; + if (!smp) + return SMP_UNSPECIFIED; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); -- cgit v1.2.1 From 49dfbb9129c4edb318578de35cc45c555df37884 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Thu, 19 Jul 2012 12:54:04 +0530 Subject: Bluetooth: Fix socket not getting freed if l2cap channel create fails If l2cap_chan_create() fails then it will return from l2cap_sock_kill since zapped flag of sk is reset. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e8427e..b94abd30e6f9 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1174,7 +1174,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p chan = l2cap_chan_create(); if (!chan) { - l2cap_sock_kill(sk); + sk_free(sk); return NULL; } -- cgit v1.2.1 From a9e050f4e7f9d36afe0dcc0bddba864ee442715e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Aug 2012 22:34:50 +0000 Subject: net: tcp: GRO should be ECN friendly While doing TCP ECN tests, I discovered GRO was reordering packets if it receives one packet with CE set, while previous packets in same NAPI run have ECT(0) for the same flow : 09:25:25.857620 IP (tos 0x2,ECT(0), ttl 64, id 27893, offset 0, flags [DF], proto TCP (6), length 4396) 172.30.42.19.54550 > 172.30.42.13.44139: Flags [.], seq 233801:238145, ack 1, win 115, options [nop,nop,TS val 3397779 ecr 1990627], length 4344 09:25:25.857626 IP (tos 0x3,CE, ttl 64, id 27892, offset 0, flags [DF], proto TCP (6), length 1500) 172.30.42.19.54550 > 172.30.42.13.44139: Flags [.], seq 232353:233801, ack 1, win 115, options [nop,nop,TS val 3397779 ecr 1990627], length 1448 09:25:25.857638 IP (tos 0x0, ttl 64, id 34581, offset 0, flags [DF], proto TCP (6), length 64) 172.30.42.13.44139 > 172.30.42.19.54550: Flags [.], cksum 0xac8f (incorrect -> 0xca69), ack 232353, win 1271, options [nop,nop,TS val 1990627 ecr 3397779,nop,nop,sack 1 {233801:238145}], length 0 We have two problems here : 1) GRO reorders packets If NIC gave packet1, then packet2, which happen to be from "different flows" GRO feeds stack with packet2, then packet1. I have yet to understand how to solve this problem. 2) GRO is not ECN friendly Delivering packets out of order makes TCP stack not as fast as it could be. In this patch I suggest we make the tos test not part of the 'same_flow' determination, but part of the 'should flush' logic Signed-off-by: Eric Dumazet Cc: Herbert Xu Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe4582ca969a..6681ccf5c3ee 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1364,7 +1364,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (*(u8 *)iph != 0x45) goto out_unlock; - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out_unlock; id = ntohl(*(__be32 *)&iph->id); @@ -1380,7 +1380,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, iph2 = ip_hdr(p); if ((iph->protocol ^ iph2->protocol) | - (iph->tos ^ iph2->tos) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; @@ -1390,6 +1389,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, /* All fields must match except length and checksum. */ NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | + (iph->tos ^ iph2->tos) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; -- cgit v1.2.1 From aae06bf5f90403554f8d4ff83810a8281aef7f03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Aug 2012 11:04:43 +0000 Subject: tcp: ecn: dont delay ACKS after CE While playing with CoDel and ECN marking, I discovered a non optimal behavior of receiver of CE (Congestion Encountered) segments. In pathological cases, sender has reduced its cwnd to low values, and receiver delays its ACK (by 40 ms). While RFC 3168 6.1.3 (The TCP Receiver) doesn't explicitly recommend to send immediate ACKS, we believe its better to not delay ACKS, because a CE segment should give same signal than a dropped segment, and its quite important to reduce RTT to give ECE/CWR signals as fast as possible. Note we already call tcp_enter_quickack_mode() from TCP_ECN_check_ce() if we receive a retransmit, for the same reason. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2fd2bc9e3c64..fa2c2c2cac2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s tcp_enter_quickack_mode((struct sock *)tp); break; case INET_ECN_CE: - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ + tcp_enter_quickack_mode((struct sock *)tp); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } /* fallinto */ default: tp->ecn_flags |= TCP_ECN_SEEN; -- cgit v1.2.1 From 4185392da4b4b494e51934c51b999b4df424afba Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 6 Aug 2012 15:49:47 -0700 Subject: openvswitch: Relax set header validation. When installing a flow with an action to set a particular field we need to validate that the packets that are part of the flow actually contain that header. With IP we use zeroed addresses and with TCP/UDP the check is for zeroed ports. This check is overly broad and can catch packets like DHCP requests that have a zero source address in a legitimate header. This changes the check to look for a zeroed protocol number for IP or for both ports be zero for TCP/UDP before considering the header to not exist. Reported-by: Ethan Jackson Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8277d29e710..cf58cedad083 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -425,10 +425,10 @@ static int validate_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src && flow_key->ipv4.tp.dst) + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) return 0; } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src && flow_key->ipv6.tp.dst) + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) return 0; } @@ -460,7 +460,7 @@ static int validate_set(const struct nlattr *a, if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst) + if (!flow_key->ip.proto) return -EINVAL; ipv4_key = nla_data(ovs_key); -- cgit v1.2.1 From 79cda75a107da0d49732b5cb642b456264dd7e0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 10:45:47 +0000 Subject: fib: use __fls() on non null argument __fls(x) is a bit faster than fls(x), granted we know x is non null. As Ben Hutchings pointed out, fls(x) = __fls(x) + 1 Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f0cdb30921c0..f84a0e90d675 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1550,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, * state.directly. */ if (pref_mismatch) { - int mp = KEYLENGTH - fls(pref_mismatch); + /* fls(x) = __fls(x) + 1 */ + int mp = KEYLENGTH - __fls(pref_mismatch) - 1; if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) goto backtrace; -- cgit v1.2.1 From a399a8053164ec8bcb06fed52be9941a26ecde11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Aug 2012 21:13:53 +0000 Subject: time: jiffies_delta_to_clock_t() helper to the rescue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various /proc/net files sometimes report crazy timer values, expressed in clock_t units. This happens when an expired timer delta (expires - jiffies) is passed to jiffies_to_clock_t(). This function has an overflow in : return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ); commit cbbc719fccdb8cb (time: Change jiffies_to_clock_t() argument type to unsigned long) only got around the problem. As we cant output negative values in /proc/net/tcp without breaking various tools, I suggest adding a jiffies_delta_to_clock_t() wrapper that caps the negative delta to a 0 value. Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Thomas Gleixner Cc: Paul Gortmaker Cc: Andrew Morton Cc: hank Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- net/bridge/br_stp_timer.c | 2 +- net/core/rtnetlink.c | 2 +- net/ipv4/igmp.c | 7 +++++-- net/ipv4/tcp_ipv4.c | 13 +++++-------- net/ipv6/tcp_ipv6.c | 9 +++------ 6 files changed, 16 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d21f32383517..9ce430b4657c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index a6747e673426..c3530a81a33b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p) unsigned long br_timer_value(const struct timer_list *timer) { return timer_pending(timer) - ? jiffies_to_clock_t(timer->expires - jiffies) : 0; + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c5a0a06c4ce..db037c9a4c48 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -618,7 +618,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), .rta_used = dst->__use, .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6699f23e6f55..0b5580c69f2d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2435,6 +2435,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) struct ip_mc_list *im = (struct ip_mc_list *)v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; + long delta; + #ifdef CONFIG_IP_MULTICAST querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : IGMP_V2_SEEN(state->in_dev) ? "V2" : @@ -2448,11 +2450,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } + delta = im->timer.expires - jiffies; seq_printf(seq, "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, im->tm_running ? - jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->tm_running, + im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, im->reporter); } return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..c660d2c19a2b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2385,7 +2385,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); - int ttd = req->expires - jiffies; + long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", @@ -2397,7 +2397,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ - jiffies_to_clock_t(ttd), + jiffies_delta_to_clock_t(delta), req->retrans, uid, 0, /* non standard timer */ @@ -2448,7 +2448,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->write_seq - tp->snd_una, rx_queue, timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sk), icsk->icsk_probes_out, @@ -2467,10 +2467,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = tw->tw_daddr; src = tw->tw_rcv_saddr; @@ -2480,7 +2477,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw, len); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..aa41b0e6b163 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1875,7 +1875,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), icsk->icsk_probes_out, @@ -1895,10 +1895,7 @@ static void get_timewait6_sock(struct seq_file *seq, const struct in6_addr *dest, *src; __u16 destp, srcp; const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = &tw6->tw_v6_daddr; src = &tw6->tw_v6_rcv_saddr; @@ -1914,7 +1911,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw); } -- cgit v1.2.1 From 9c7dafbfab1554705f85523fead578aa1a3d338c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 8 Aug 2012 21:52:46 +0000 Subject: net: Allow to create links with given ifindex Currently the RTM_NEWLINK results in -EOPNOTSUPP if the ifinfomsg->ifi_index is not zero. I propose to allow requesting ifindices on link creation. This is required by the checkpoint-restore to correctly restore a net namespace (i.e. -- a container). Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++++++- net/core/rtnetlink.c | 12 +++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f91abf800161..3ca300d85271 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5579,7 +5579,12 @@ int register_netdevice(struct net_device *dev) } } - dev->ifindex = dev_new_index(net); + ret = -EBUSY; + if (!dev->ifindex) + dev->ifindex = dev_new_index(net); + else if (__dev_get_by_index(net, dev->ifindex)) + goto err_uninit; + if (dev->iflink == -1) dev->iflink = dev->ifindex; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index db037c9a4c48..34d975b0f277 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1812,8 +1812,6 @@ replay: return -ENODEV; } - if (ifm->ifi_index) - return -EOPNOTSUPP; if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; @@ -1839,10 +1837,14 @@ replay: return PTR_ERR(dest_net); dev = rtnl_create_link(net, dest_net, ifname, ops, tb); - - if (IS_ERR(dev)) + if (IS_ERR(dev)) { err = PTR_ERR(dev); - else if (ops->newlink) + goto out; + } + + dev->ifindex = ifm->ifi_index; + + if (ops->newlink) err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); -- cgit v1.2.1 From aa79e66eee5d525e2fcbd2a5fcb87ae3dd4aa9e9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 8 Aug 2012 21:53:19 +0000 Subject: net: Make ifindex generation per-net namespace Strictly speaking this is only _really_ required for checkpoint-restore to make loopback device always have the same index. This change appears to be safe wrt "ifindex should be unique per-system" concept, as all the ifindex usage is either already made per net namespace of is explicitly limited with init_net only. There are two cool side effects of this. The first one -- ifindices of devices in container are always small, regardless of how many containers we've started (and re-started) so far. The second one is -- we can speed up the loopback ifidex access as shown in the next patch. v2: Place ifindex right after dev_base_seq : avoid two holes and use the same cache line, dirtied in list_netdevice()/unlist_netdevice() Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3ca300d85271..1f06df8d10a3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5221,12 +5221,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ static int dev_new_index(struct net *net) { - static int ifindex; + int ifindex = net->ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; if (!__dev_get_by_index(net, ifindex)) - return ifindex; + return net->ifindex = ifindex; } } -- cgit v1.2.1 From 1fb9489bf190ce2b3fc03891f3de4b2d30600e28 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 8 Aug 2012 21:53:36 +0000 Subject: net: Loopback ifindex is constant now As pointed out, there are places, that access net->loopback_dev->ifindex and after ifindex generation is made per-net this value becomes constant equals 1. So go ahead and introduce the LOOPBACK_IFINDEX constant and use it where appropriate. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 6 +++--- net/ipv4/fib_frontend.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv6/route.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c8..c855e8d0738f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o .saddr = oldflp->saddr, .flowidn_scope = RT_SCOPE_UNIVERSE, .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_iif = LOOPBACK_IFINDEX, .flowidn_oif = oldflp->flowidn_oif, }; struct dn_route *rt = NULL; @@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_mark, LOOPBACK_IFINDEX, oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ @@ -1042,7 +1042,7 @@ source_ok: if (!fld.daddr) goto out; } - fld.flowidn_oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; goto make_route; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3fba792..7f073a38c87d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { fl4.flowi4_oif = 0; - fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.daddr = ip_hdr(skb)->saddr; fl4.saddr = 0; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..3a57570c8ee5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1798,7 +1798,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? - net->loopback_dev->ifindex : + LOOPBACK_IFINDEX : skb->dev->ifindex), .flowi4_mark = skb->mark, }; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 31371be8174b..c30130062cd6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return ipv4_is_local_multicast(iph->daddr) ^ invert; flow.flowi4_iif = 0; } else { - flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + flow.flowi4_iif = LOOPBACK_IFINDEX; } flow.daddr = iph->saddr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21ad369014c0..c58137391a3d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1619,7 +1619,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - net->loopback_dev->ifindex, + LOOPBACK_IFINDEX, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; @@ -1895,7 +1895,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = net->loopback_dev->ifindex; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -1984,7 +1984,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!fl4->daddr) fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl4->flowi4_oif = net->loopback_dev->ifindex; + fl4->flowi4_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd279100..0ddf2d132e7f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -965,7 +965,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, { int flags = 0; - fl6->flowi6_iif = net->loopback_dev->ifindex; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; -- cgit v1.2.1 From aaea4ed74d71dda1cf2cc19c206552d537201452 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Jul 2012 20:32:12 +0300 Subject: ipvs: ip_vs_ftp depends on nf_conntrack_ftp helper The FTP application indirectly depends on the nf_conntrack_ftp helper for proper NAT support. If the module is not loaded, IPVS can resize the packets for the command connection, eg. PASV response but the SEQ adjustment logic in ipv4_confirm is not called without helper. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index f9871385a65e..8b2cffdfdd99 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -250,7 +250,8 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT + depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \ + NF_CONNTRACK_FTP select IP_VS_NFCT ---help--- FTP is a protocol that transfers IP address and/or port number in -- cgit v1.2.1 From be97fdb5fbcc828240c51769cd28cba609158703 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 12 Jul 2012 23:06:20 +0300 Subject: ipvs: generalize app registration in netns Get rid of the ftp_app pointer and allow applications to be registered without adding fields in the netns_ipvs structure. v2: fix coding style as suggested by Pablo Neira Ayuso Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 58 ++++++++++++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_ftp.c | 21 ++++----------- 2 files changed, 47 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 64f9e8f13207..9713e6e86d47 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, } -/* - * ip_vs_app registration routine - */ -int register_ip_vs_app(struct net *net, struct ip_vs_app *app) +/* Register application for netns */ +struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) { struct netns_ipvs *ipvs = net_ipvs(net); - /* increase the module use count */ - ip_vs_use_count_inc(); + struct ip_vs_app *a; + int err = 0; + + if (!ipvs) + return ERR_PTR(-ENOENT); mutex_lock(&__ip_vs_app_mutex); - list_add(&app->a_list, &ipvs->app_list); + list_for_each_entry(a, &ipvs->app_list, a_list) { + if (!strcmp(app->name, a->name)) { + err = -EEXIST; + goto out_unlock; + } + } + a = kmemdup(app, sizeof(*app), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out_unlock; + } + INIT_LIST_HEAD(&a->incs_list); + list_add(&a->a_list, &ipvs->app_list); + /* increase the module use count */ + ip_vs_use_count_inc(); +out_unlock: mutex_unlock(&__ip_vs_app_mutex); - return 0; + return err ? ERR_PTR(err) : a; } @@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct ip_vs_app *inc, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_app *a, *anxt, *inc, *nxt; + + if (!ipvs) + return; mutex_lock(&__ip_vs_app_mutex); - list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); - } + list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { + if (app && strcmp(app->name, a->name)) + continue; + list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { + ip_vs_app_inc_release(net, inc); + } - list_del(&app->a_list); + list_del(&a->a_list); + kfree(a); - mutex_unlock(&__ip_vs_app_mutex); + /* decrease the module use count */ + ip_vs_use_count_dec(); + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + mutex_unlock(&__ip_vs_app_mutex); } @@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net) void __net_exit ip_vs_app_net_cleanup(struct net *net) { + unregister_ip_vs_app(net, NULL /* all */); proc_net_remove(net, "ip_vs_app"); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b20b29c903ef..ad70b7e4ac4a 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); - if (!app) - return -ENOMEM; - INIT_LIST_HEAD(&app->a_list); - INIT_LIST_HEAD(&app->incs_list); - ipvs->ftp_app = app; - ret = register_ip_vs_app(net, app); - if (ret) - goto err_exit; + app = register_ip_vs_app(net, &ip_vs_ftp); + if (IS_ERR(app)) + return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) @@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, app); -err_exit: - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); return ret; } /* @@ -474,10 +466,7 @@ err_exit: */ static void __ip_vs_ftp_exit(struct net *net) { - struct netns_ipvs *ipvs = net_ipvs(net); - - unregister_ip_vs_app(net, ipvs->ftp_app); - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { -- cgit v1.2.1 From 2b2d280817bd576e97ccd243b9b3a344d11ddd11 Mon Sep 17 00:00:00 2001 From: Claudiu Ghioc Date: Wed, 18 Jul 2012 12:10:22 +0300 Subject: ipvs: fixed sparse warning Removed the following sparse warnings, wether CONFIG_SYSCTL is defined or not: * warning: symbol 'ip_vs_control_net_init_sysctl' was not declared. Should it be static? * warning: symbol 'ip_vs_control_net_cleanup_sysctl' was not declared. Should it be static? Signed-off-by: Claudiu Ghioc Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dda194b..d6d5ccabe067 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3675,7 +3675,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3743,7 +3743,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3754,8 +3754,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif -- cgit v1.2.1 From f2edb9f7706dcb2c0d9a362b2ba849efe3a97f5e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 20 Jul 2012 11:59:52 +0300 Subject: ipvs: implement passive PMTUD for IPIP packets IPVS is missing the logic to update PMTU in routing for its IPIP packets. We monitor the dst_mtu and can return FRAG_NEEDED messages but if the tunneled packets get ICMP error we can not rely on other traffic to save the lowest MTU. The following patch adds ICMP handling for IPIP packets in incoming direction, from some remote host to our local IP used as saddr in the outer header. By this way we can forward any related ICMP traffic if it is for IPVS TUN connection. For the special case of PMTUD we update the routing and if client requested DF we can forward the error. To properly update the routing we have to bind the cached route (dest->dst_cache) to the selected saddr because ipv4_update_pmtu uses saddr for dst lookup. Add IP_VS_RT_MODE_CONNECT flag to force such binding with second route. Update ip_vs_tunnel_xmit to provide IP_VS_RT_MODE_CONNECT and change the code to copy DF. For now we prefer not to force PMTU discovery (outer DF=1) because we don't have configuration option to enable or disable PMTUD. As we do not keep any packets to resend, we prefer not to play games with packets without DF bit because the sender is not informed when they are rejected. Also, change ops->update_pmtu to be called only for local clients because there is no point to update MTU for input routes, in our case skb->dst->dev is lo. It seems the code is copied from ipip.c where the skb dst points to tunnel device. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 76 +++++++++++++++++++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_xmit.c | 79 ++++++++++++++++++++++++++++------------- 2 files changed, 128 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b54eccef40b5..58918e20f9d5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, ihl, verdict; + unsigned int offset, offset2, ihl, verdict; + bool ipip; *related = 1; @@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) net = skb_net(skb); + /* Special case for errors for IPIP packets */ + ipip = false; + if (cih->protocol == IPPROTO_IPIP) { + if (unlikely(cih->frag_off & htons(IP_OFFSET))) + return NF_ACCEPT; + /* Error for our IPIP must arrive at LOCAL_IN */ + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) + return NF_ACCEPT; + offset += cih->ihl * 4; + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + ipip = true; + } + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); + offset2 = offset; offset += cih->ihl * 4; ip_vs_fill_iphdr(AF_INET, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); + /* The embedded headers contain source and dest in reverse order. + * For IPIP this is error for request, not for reply. + */ + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto out; } + if (ipip) { + __be32 info = ic->un.gateway; + + /* Update the MTU */ + if (ic->type == ICMP_DEST_UNREACH && + ic->code == ICMP_FRAG_NEEDED) { + struct ip_vs_dest *dest = cp->dest; + u32 mtu = ntohs(ic->un.frag.mtu); + + /* Strip outer IP and ICMP, go to IPIP header */ + __skb_pull(skb, ihl + sizeof(_icmph)); + offset2 -= ihl + sizeof(_icmph); + skb_reset_network_header(skb); + IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); + rcu_read_lock(); + ipv4_update_pmtu(skb, dev_net(skb->dev), + mtu, 0, 0, 0, 0); + rcu_read_unlock(); + /* Client uses PMTUD? */ + if (!(cih->frag_off & htons(IP_DF))) + goto ignore_ipip; + /* Prefer the resulting PMTU */ + if (dest) { + spin_lock(&dest->dst_lock); + if (dest->dst_cache) + mtu = dst_mtu(dest->dst_cache); + spin_unlock(&dest->dst_lock); + } + if (mtu > 68 + sizeof(struct iphdr)) + mtu -= sizeof(struct iphdr); + info = htonl(mtu); + } + /* Strip outer IP, ICMP and IPIP, go to IP header of + * original request. + */ + __skb_pull(skb, offset2); + skb_reset_network_header(skb); + IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, + ic->type, ic->code, ntohl(info)); + icmp_send(skb, ic->type, ic->code, info); + /* ICMP can be shorter but anyways, account it */ + ip_vs_out_stats(cp, skb); + +ignore_ipip: + consume_skb(skb); + verdict = NF_STOLEN; + goto out; + } + /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65b616ae1716..c2275ba048e9 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -49,6 +49,7 @@ enum { IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to * local */ + IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ }; /* @@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +/* Get route to daddr, update *saddr, optionally bind route to saddr */ +static struct rtable *do_output_route4(struct net *net, __be32 daddr, + u32 rtos, int rt_mode, __be32 *saddr) +{ + struct flowi4 fl4; + struct rtable *rt; + int loop = 0; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; + fl4.flowi4_tos = rtos; + +retry: + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + /* Invalid saddr ? */ + if (PTR_ERR(rt) == -EINVAL && *saddr && + rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { + *saddr = 0; + flowi4_update_output(&fl4, 0, rtos, daddr, 0); + goto retry; + } + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); + return NULL; + } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + ip_rt_put(rt); + *saddr = fl4.saddr; + flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + loop++; + goto retry; + } + *saddr = fl4.saddr; + return rt; +} + /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, @@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = dest->addr.ip; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { + rt = do_output_route4(net, dest->addr.ip, rtos, + rt_mode, &dest->dst_saddr.ip); + if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - dest->dst_saddr.ip = fl4.saddr; IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " "rtos=%X\n", &dest->addr.ip, &dest->dst_saddr.ip, @@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, *ret_saddr = dest->dst_saddr.ip; spin_unlock(&dest->dst_lock); } else { - struct flowi4 fl4; + __be32 saddr = htonl(INADDR_ANY); - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = daddr; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &daddr); + /* For such unconfigured boxes avoid many route lookups + * for performance reasons because we do not remember saddr + */ + rt_mode &= ~IP_VS_RT_MODE_CONNECT; + rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + if (!rt) return NULL; - } if (ret_saddr) - *ret_saddr = fl4.saddr; + *ret_saddr = saddr; } local = rt->rt_flags & RTCF_LOCAL; @@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) old_dst = dest->dst_cache; dest->dst_cache = NULL; dst_release(old_dst); + dest->dst_saddr.ip = 0; } #define IP_VS_XMIT_TUNNEL(skb, cp) \ @@ -771,7 +800,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; - __be16 df = old_iph->frag_off; + __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -781,7 +810,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { @@ -796,10 +826,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } - if (skb_dst(skb)) + if (rt_is_output_route(skb_rtable(skb))) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - df |= (old_iph->frag_off & htons(IP_DF)); + /* Copy DF, reset fragment offset and MF */ + df = old_iph->frag_off & htons(IP_DF); if ((old_iph->frag_off & htons(IP_DF) && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { -- cgit v1.2.1 From 3654e61137db891f5312e6dd813b961484b5fdf3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 20 Jul 2012 11:59:53 +0300 Subject: ipvs: add pmtu_disc option to disable IP DF for TUN packets Disabling PMTU discovery can increase the output packet rate but some users have enough resources and prefer to fragment than to drop traffic. By default, we copy the DF bit but if pmtu_disc is disabled we do not send FRAG_NEEDED messages anymore. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ net/netfilter/ipvs/ip_vs_xmit.c | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d6d5ccabe067..03d3fc6d9d64 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "pmtu_disc", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3726,6 +3732,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + ipvs->sysctl_pmtu_disc = 1; + tbl[idx++].data = &ipvs->sysctl_pmtu_disc; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c2275ba048e9..543a554008af 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -795,6 +795,7 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -830,10 +831,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); /* Copy DF, reset fragment offset and MF */ - df = old_iph->frag_off & htons(IP_DF); + df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if ((old_iph->frag_off & htons(IP_DF) && - mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { + if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; -- cgit v1.2.1 From e9324b2ce656e1910d2385b9b47a2f926456dbe3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:45 +0000 Subject: netfilter: nf_ct_sip: fix helper name Commit 3a8fc53a (netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names) introduced a bug in the SIP helper, the helper name is sprinted to the sip_names array instead of instead of into the helper structure. This breaks the helper match and the /proc/net/nf_conntrack_expect output. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 758a1bacc126..2fb666920cc9 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1515,7 +1515,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; -static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1585,9 +1584,9 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].me = THIS_MODULE; if (ports[i] == SIP_PORT) - sprintf(sip_names[i][j], "sip"); + sprintf(sip[i][j].name, "sip"); else - sprintf(sip_names[i][j], "sip-%u", i); + sprintf(sip[i][j].name, "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); -- cgit v1.2.1 From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:46 +0000 Subject: netfilter: nf_ct_sip: fix IPv6 address parsing Within SIP messages IPv6 addresses are enclosed in square brackets in most cases, with the exception of the "received=" header parameter. Currently the helper fails to parse enclosed addresses. This patch: - changes the SIP address parsing function to enforce square brackets when required, and accept them when not required but present, as recommended by RFC 5118. - adds a new SDP address parsing function that never accepts square brackets since SDP doesn't use them. With these changes, the SIP helper correctly parses all test messages from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages for Internet Protocol Version 6 (IPv6)). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 4 +- net/netfilter/nf_conntrack_sip.c | 87 ++++++++++++++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ea4a23813d26..eef8f29e8bf8 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -173,7 +173,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * the reply. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "maddr=", &poff, &plen, - &addr) > 0 && + &addr, true) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", @@ -187,7 +187,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * from which the server received the request. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "received=", &poff, &plen, - &addr) > 0 && + &addr, false) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 2fb666920cc9..5c0a112aeee6 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr, return len + digits_len(ct, dptr, limit, shift); } -static int parse_addr(const struct nf_conn *ct, const char *cp, - const char **endp, union nf_inet_addr *addr, - const char *limit) +static int sip_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit, bool delim) { const char *end; - int ret = 0; + int ret; if (!ct) return 0; @@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp, switch (nf_ct_l3num(ct)) { case AF_INET: ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + if (ret == 0) + return 0; break; case AF_INET6: + if (cp < limit && *cp == '[') + cp++; + else if (delim) + return 0; + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + if (ret == 0) + return 0; + + if (end < limit && *end == ']') + end++; + else if (delim) + return 0; break; default: BUG(); } - if (ret == 0 || end == cp) - return 0; if (endp) *endp = end; return 1; @@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, union nf_inet_addr addr; const char *aux = dptr; - if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { + if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) { pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, return 0; dptr += shift; - if (!parse_addr(ct, dptr, &end, addr, limit)) + if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; if (end < limit && *end == ':') { end++; @@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (ret == 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) + if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; if (*c == ':') { c++; @@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, const char *name, unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr) + union nf_inet_addr *addr, bool delim) { const char *limit = dptr + datalen; const char *start, *end; @@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - if (!parse_addr(ct, start, &end, addr, limit)) + if (!sip_parse_addr(ct, start, &end, addr, limit, delim)) return 0; *matchoff = start - dptr; *matchlen = end - start; @@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, return 1; } +static int sdp_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit) +{ + const char *end; + int ret; + + memset(addr, 0, sizeof(*addr)); + switch (nf_ct_l3num(ct)) { + case AF_INET: + ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + break; + case AF_INET6: + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + break; + default: + BUG(); + } + + if (ret == 0) + return 0; + if (endp) + *endp = end; + return 1; +} + +/* skip ip address. returns its length. */ +static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + union nf_inet_addr addr; + const char *aux = dptr; + + if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) { + pr_debug("ip: %s parse failed.!\n", dptr); + return 0; + } + + return dptr - aux; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, */ static const struct sip_header ct_sdp_hdrs[] = { [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), - [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), - [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), - [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), - [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), + [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; @@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, if (ret <= 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, NULL, addr, - dptr + *matchoff + *matchlen)) + if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr, + dptr + *matchoff + *matchlen)) return -1; return 1; } -- cgit v1.2.1 From f22eb25cf5b1157b29ef88c793b71972efc47143 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:47 +0000 Subject: netfilter: nf_nat_sip: fix via header translation with multiple parameters Via-headers are parsed beginning at the first character after the Via-address. When the address is translated first and its length decreases, the offset to start parsing at is incorrect and header parameters might be missed. Update the offset after translating the Via-address to fix this. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index eef8f29e8bf8..4ad9cf173992 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; + unsigned int olen, matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; /* We're only interested in headers related to this @@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, goto next; } + olen = *datalen; if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ -- cgit v1.2.1 From 89c8d91e31f267703e365593f6bfebb9f6d2ad01 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 8 Aug 2012 16:30:13 +0100 Subject: tty: localise the lock The termios and other changes mean the other protections needed on the driver tty arrays should be adequate. Turn it all back on. This contains pieces folded in from the fixes made to the original patches | From: Geert Uytterhoeven (fix m68k) | From: Paul Gortmaker (fix cris) | From: Jiri Kosina (lockdep) | From: Eric Dumazet (lockdep) Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 87ddd051881b..b54c86a2e66b 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -705,9 +705,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } - tty_unlock(); + tty_unlock(tty); schedule(); - tty_lock(); + tty_lock(tty); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); -- cgit v1.2.1 From df32381896f5f0c78a371df2e49ab7c776b1a5ba Mon Sep 17 00:00:00 2001 From: Marco Porsch Date: Wed, 8 Aug 2012 07:58:43 +0200 Subject: mac80211: fix unnecessary beacon update after peering status change ieee80211_bss_info_change_notify is called everytime a peer link is established or closed, because the accepting_plinks flag in the meshconf IE *might* have changed. With this patch the corresponding functions return the BSS_CHANGED_BEACON flag when a beacon update is necessary. Also it makes mesh_accept_plinks_update the common place to update the accepting_plinks flag. mesh_accept_plinks_update is called upon plink change and also periodically from ieee80211_mesh_housekeeping. Thus, it also picks up changes of local->num_sta. Signed-off-by: Marco Porsch Acked-by: Thomas Pedersen Signed-off-by: John W. Linville --- net/mac80211/mesh.c | 21 +++++++++++++-------- net/mac80211/mesh.h | 2 +- net/mac80211/mesh_plink.c | 44 +++++++++++++++++++------------------------- 3 files changed, 33 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6fac18c0423f..856dcf49ce75 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -136,10 +136,13 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) * mesh_accept_plinks_update - update accepting_plink in local mesh beacons * * @sdata: mesh interface in which mesh beacons are going to be updated + * + * Returns: beacon changed flag if the beacon content changed. */ -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) { bool free_plinks; + u32 changed = 0; /* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0, * the mesh interface might be able to establish plinks with peers that @@ -149,8 +152,12 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) */ free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_mesh_housekeeping_timer((unsigned long) sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) { + sdata->u.mesh.accepting_plinks = free_plinks; + changed = BSS_CHANGED_BEACON; + } + + return changed; } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -262,7 +269,6 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) neighbors = (neighbors > 15) ? 15 : neighbors; *pos++ = neighbors << 1; /* Mesh capability */ - ifmsh->accepting_plinks = mesh_plink_availables(sdata); *pos = MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; @@ -521,14 +527,13 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh) { - bool free_plinks; + u32 changed; ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); - free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + changed = mesh_accept_plinks_update(sdata); + ieee80211_bss_info_change_notify(sdata, changed); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index faaa39bcfd10..13fd5b5fdb0a 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -282,7 +282,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index af671b984df3..f20e9f26d137 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -48,17 +48,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *da, __le16 llid, __le16 plid, __le16 reason); static inline -void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } static inline -void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } /** @@ -170,22 +170,21 @@ out: * @sta: mesh peer link to deactivate * * All mesh paths with this peer as next hop will be flushed + * Returns beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->lock */ -static bool __mesh_plink_deactivate(struct sta_info *sta) +static u32 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated = false; + u32 changed = 0; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - mesh_plink_dec_estab_count(sdata); - deactivated = true; - } + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count(sdata); sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); - return deactivated; + return changed; } /** @@ -198,18 +197,17 @@ static bool __mesh_plink_deactivate(struct sta_info *sta) void mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, @@ -541,15 +539,14 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } @@ -852,9 +849,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; @@ -888,9 +884,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, @@ -908,13 +903,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CLS_ACPT: reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; - __mesh_plink_deactivate(sta); + changed |= __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, llid, plid, reason); break; -- cgit v1.2.1 From 41f63c5359d14ca995172b8f6eaffd93f60fec54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:47 -0700 Subject: workqueue: use mod_delayed_work() instead of cancel + queue Convert delayed_work users doing cancel_delayed_work() followed by queue_delayed_work() to mod_delayed_work(). Most conversions are straight-forward. Ones worth mentioning are, * drivers/edac/edac_mc.c: edac_mc_workq_setup() converted to always use mod_delayed_work() and cancel loop in edac_mc_reset_delay_period() is dropped. * drivers/platform/x86/thinkpad_acpi.c: No need to remember whether watchdog is active or not. @fan_watchdog_active and related code dropped. * drivers/power/charger-manager.c: Seemingly a lot of delayed_work_pending() abuse going on here. [delayed_]work_pending() are unsynchronized and racy when used like this. I converted one instance in fullbatt_handler(). Please conver the rest so that it invokes workqueue APIs for the intended target state rather than trying to game work item pending state transitions. e.g. if timer should be modified - call mod_delayed_work(), canceled - call cancel_delayed_work[_sync](). * drivers/thermal/thermal_sys.c: thermal_zone_device_set_polling() simplified. Note that round_jiffies() calls in this function are meaningless. round_jiffies() work on absolute jiffies not delta delay used by delayed_work. v2: Tomi pointed out that __cancel_delayed_work() users can't be safely converted to mod_delayed_work(). They could be calling it from irq context and if that happens while delayed_work_timer_fn() is running, it could deadlock. __cancel_delayed_work() users are dropped. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Acked-by: Dmitry Torokhov Acked-by: Anton Vorontsov Acked-by: David Howells Cc: Tomi Valkeinen Cc: Jens Axboe Cc: Jiri Kosina Cc: Doug Thompson Cc: David Airlie Cc: Roland Dreier Cc: "John W. Linville" Cc: Zhang Rui Cc: Len Brown Cc: "J. Bruce Fields" Cc: Johannes Berg --- net/core/dst.c | 4 ++-- net/rfkill/input.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..ed5a0b409aa3 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -214,8 +214,8 @@ void __dst_free(struct dst_entry *dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; - cancel_delayed_work(&dst_gc_work); - schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); + mod_delayed_work(system_wq, &dst_gc_work, + dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); } diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 24c55c53e6a2..c9d931e7ffec 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -164,8 +164,7 @@ static void rfkill_schedule_global_op(enum rfkill_sched_op op) rfkill_op_pending = true; if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { /* bypass the limiter for EPO */ - cancel_delayed_work(&rfkill_op_work); - schedule_delayed_work(&rfkill_op_work, 0); + mod_delayed_work(system_wq, &rfkill_op_work, 0); rfkill_last_scheduled = jiffies; } else rfkill_schedule_ratelimited(); -- cgit v1.2.1 From 734cc1783816ae358cef45673a29bf7af974e147 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Aug 2012 21:47:47 +0200 Subject: TTY: use tty_port_register_device Currently we have no way to assign tty->port while performing tty installation. There are two ways to provide the link tty_struct => tty_port. Either by calling tty_port_install from tty->ops->install or tty_port_register_device called instead of tty_register_device when the device is being set up after connected. In this patch we modify most of the drivers to do the latter. When the drivers use tty_register_device and we have tty_port already, we switch to tty_port_register_device. So we have the tty_struct => tty_port link for free for those. Signed-off-by: Jiri Slaby Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index b54c86a2e66b..18a80b94a8bd 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -278,8 +278,8 @@ out: if (err < 0) goto free; - dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL); - + dev->tty_dev = tty_port_register_device(&dev->port, rfcomm_tty_driver, + dev->id, NULL); if (IS_ERR(dev->tty_dev)) { err = PTR_ERR(dev->tty_dev); list_del(&dev->list); -- cgit v1.2.1 From 9c650ffc27b444f5370ae5e8bc84df76584416a0 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Aug 2012 21:48:02 +0200 Subject: TTY: ircomm_tty, add tty install This has two outcomes: * we give the TTY layer a tty_port * we do not find the info structure every time open is called on that tty Signed-off-by: Jiri Slaby Cc: Samuel Ortiz Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 7a0d6115d06f..96689906b93c 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -52,6 +52,8 @@ #include #include +static int ircomm_tty_install(struct tty_driver *driver, + struct tty_struct *tty); static int ircomm_tty_open(struct tty_struct *tty, struct file *filp); static void ircomm_tty_close(struct tty_struct * tty, struct file *filp); static int ircomm_tty_write(struct tty_struct * tty, @@ -82,6 +84,7 @@ static struct tty_driver *driver; static hashbin_t *ircomm_tty = NULL; static const struct tty_operations ops = { + .install = ircomm_tty_install, .open = ircomm_tty_open, .close = ircomm_tty_close, .write = ircomm_tty_write, @@ -374,21 +377,11 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, return retval; } -/* - * Function ircomm_tty_open (tty, filp) - * - * This routine is called when a particular tty device is opened. This - * routine is mandatory; if this routine is not filled in, the attempted - * open will fail with ENODEV. - */ -static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) + +static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct ircomm_tty_cb *self; unsigned int line = tty->index; - unsigned long flags; - int ret; - - IRDA_DEBUG(2, "%s()\n", __func__ ); /* Check if instance already exists */ self = hashbin_lock_find(ircomm_tty, line, NULL); @@ -425,14 +418,30 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) tty->termios.c_oflag = 0; /* Insert into hash */ - /* FIXME there is a window from find to here */ hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL); } + + return tty_port_install(&self->port, driver, tty); +} + +/* + * Function ircomm_tty_open (tty, filp) + * + * This routine is called when a particular tty device is opened. This + * routine is mandatory; if this routine is not filled in, the attempted + * open will fail with ENODEV. + */ +static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) +{ + struct ircomm_tty_cb *self = tty->driver_data; + unsigned long flags; + int ret; + + IRDA_DEBUG(2, "%s()\n", __func__ ); + /* ++ is not atomic, so this should be protected - Jean II */ spin_lock_irqsave(&self->port.lock, flags); self->port.count++; - - tty->driver_data = self; spin_unlock_irqrestore(&self->port.lock, flags); tty_port_tty_set(&self->port, tty); @@ -472,7 +481,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) } /* Check if this is a "normal" ircomm device, or an irlpt device */ - if (line < 0x10) { + if (self->line < 0x10) { self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE; self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */ /* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */ -- cgit v1.2.1 From 19e303d67dc2e68a7f14b0baf7949195d7327145 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 28 Jul 2012 14:45:50 +0000 Subject: netfilter: PTR_RET can be used This quiets the coccinelle warnings: net/bridge/netfilter/ebtable_filter.c:107:1-3: WARNING: PTR_RET can be used net/bridge/netfilter/ebtable_nat.c:107:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_filter.c:65:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_mangle.c:100:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_raw.c:44:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_security.c:62:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_filter.c:72:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_mangle.c:107:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_raw.c:51:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_security.c:70:1-3: WARNING: PTR_RET can be used Signed-off-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_filter.c | 4 +--- net/bridge/netfilter/ebtable_nat.c | 4 +--- net/ipv4/netfilter/iptable_filter.c | 4 +--- net/ipv4/netfilter/iptable_mangle.c | 4 +--- net/ipv4/netfilter/iptable_raw.c | 4 +--- net/ipv4/netfilter/iptable_security.c | 5 +---- net/ipv6/netfilter/ip6table_filter.c | 4 +--- net/ipv6/netfilter/ip6table_mangle.c | 4 +--- net/ipv6/netfilter/ip6table_raw.c | 4 +--- net/ipv6/netfilter/ip6table_security.c | 5 +---- 10 files changed, 10 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 42e6bd094574..3c2e9dced9e0 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { net->xt.frame_filter = ebt_register_table(net, &frame_filter); - if (IS_ERR(net->xt.frame_filter)) - return PTR_ERR(net->xt.frame_filter); - return 0; + return PTR_RET(net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 6dc2f878ae05..10871bc77908 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { net->xt.frame_nat = ebt_register_table(net, &frame_nat); - if (IS_ERR(net->xt.frame_nat)) - return PTR_ERR(net->xt.frame_nat); - return 0; + return PTR_RET(net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 851acec852d2..d20cc374025c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net) net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_filter)) - return PTR_ERR(net->ipv4.iptable_filter); - return 0; + return PTR_RET(net->ipv4.iptable_filter); } static void __net_exit iptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aef5d1fbe77d..f38b94288cf5 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) net->ipv4.iptable_mangle = ipt_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_mangle)) - return PTR_ERR(net->ipv4.iptable_mangle); - return 0; + return PTR_RET(net->ipv4.iptable_mangle); } static void __net_exit iptable_mangle_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 07fb710cd722..b21e2191e2f5 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net) net->ipv4.iptable_raw = ipt_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_raw)) - return PTR_ERR(net->ipv4.iptable_raw); - return 0; + return PTR_RET(net->ipv4.iptable_raw); } static void __net_exit iptable_raw_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index be45bdc4c602..b283d8e2601a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net) net->ipv4.iptable_security = ipt_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_security)) - return PTR_ERR(net->ipv4.iptable_security); - - return 0; + return PTR_RET(net->ipv4.iptable_security); } static void __net_exit iptable_security_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 325e59a0224f..beb5777d2043 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) net->ipv6.ip6table_filter = ip6t_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_filter)) - return PTR_ERR(net->ipv6.ip6table_filter); - return 0; + return PTR_RET(net->ipv6.ip6table_filter); } static void __net_exit ip6table_filter_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 4d782405f125..7431121b87de 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) net->ipv6.ip6table_mangle = ip6t_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_mangle)) - return PTR_ERR(net->ipv6.ip6table_mangle); - return 0; + return PTR_RET(net->ipv6.ip6table_mangle); } static void __net_exit ip6table_mangle_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5b9926a011bd..60d1bddff7a0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) net->ipv6.ip6table_raw = ip6t_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_raw)) - return PTR_ERR(net->ipv6.ip6table_raw); - return 0; + return PTR_RET(net->ipv6.ip6table_raw); } static void __net_exit ip6table_raw_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 91aa2b4d83c9..db155351339c 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net) net->ipv6.ip6table_security = ip6t_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_security)) - return PTR_ERR(net->ipv6.ip6table_security); - - return 0; + return PTR_RET(net->ipv6.ip6table_security); } static void __net_exit ip6table_security_net_exit(struct net *net) -- cgit v1.2.1 From 68e035c950dbceaf660144bf74054dfdfb6aad15 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Aug 2012 12:47:37 +0200 Subject: netfilter: ctnetlink: fix missing locking while changing conntrack from nfqueue Since 9cb017665 netfilter: add glue code to integrate nfnetlink_queue and ctnetlink, we can modify the conntrack entry via nfnl_queue. However, the change of the conntrack entry via nfnetlink_queue requires appropriate locking to avoid concurrent updates. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 14f67a2cbcb5..da4fc37a8578 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1896,10 +1896,15 @@ static int ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) { struct nlattr *cda[CTA_MAX+1]; + int ret; nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); - return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_lock_bh(&nf_conntrack_lock); + ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_unlock_bh(&nf_conntrack_lock); + + return ret; } static struct nfq_ct_hook ctnetlink_nfqueue_hook = { -- cgit v1.2.1 From ee89bab14e857678f83a71ee99e575b0fdbb58d4 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 9 Aug 2012 22:14:56 +0000 Subject: net: move and rename netif_notify_peers() I believe net/core/dev.c is a better place for netif_notify_peers(), because other net event notify functions also stay in this file. And rename it to netdev_notify_peers(). Cc: David S. Miller Cc: Ian Campbell Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev.c | 18 ++++++++++++++++++ net/sched/sch_generic.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1f06df8d10a3..5defcf940005 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1106,6 +1106,24 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); +/** + * netdev_notify_peers - notify network peers about existence of @dev + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void netdev_notify_peers(struct net_device *dev) +{ + rtnl_lock(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + rtnl_unlock(); +} +EXPORT_SYMBOL(netdev_notify_peers); + int netdev_bonding_change(struct net_device *dev, unsigned long event) { return call_netdevice_notifiers(event, dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 511323e89cec..6c4d5fe53ce8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev) } EXPORT_SYMBOL(netif_carrier_off); -/** - * netif_notify_peers - notify network peers about existence of @dev - * @dev: network device - * - * Generate traffic such that interested network peers are aware of - * @dev, such as by generating a gratuitous ARP. This may be used when - * a device wants to inform the rest of the network about some sort of - * reconfiguration such as a failover event or virtual machine - * migration. - */ -void netif_notify_peers(struct net_device *dev) -{ - rtnl_lock(); - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); - rtnl_unlock(); -} -EXPORT_SYMBOL(netif_notify_peers); - /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. -- cgit v1.2.1 From b7bc2a5b5bd99b216c3e5fe68c7f45c684ab5745 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 9 Aug 2012 22:14:57 +0000 Subject: net: remove netdev_bonding_change() I don't see any benifits to use netdev_bonding_change() than using call_netdevice_notifiers() directly. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5defcf940005..ce1bccb08de5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1124,12 +1124,6 @@ void netdev_notify_peers(struct net_device *dev) } EXPORT_SYMBOL(netdev_notify_peers); -int netdev_bonding_change(struct net_device *dev, unsigned long event) -{ - return call_netdevice_notifiers(event, dev); -} -EXPORT_SYMBOL(netdev_bonding_change); - /** * dev_load - load a network module * @net: the applicable net namespace -- cgit v1.2.1 From c12b395a46646bab69089ce7016ac78177f6001f Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Fri, 10 Aug 2012 00:51:50 +0000 Subject: gre: Support GRE over IPv6 GRE over IPv6 implementation. Signed-off-by: Dmitry Kozlov Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 16 + net/ipv6/Makefile | 1 + net/ipv6/ip6_gre.c | 1790 +++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_tunnel.c | 89 ++- 4 files changed, 1872 insertions(+), 24 deletions(-) create mode 100644 net/ipv6/ip6_gre.c (limited to 'net') diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5728695b5449..4f7fe7270e37 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -201,6 +201,22 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_GRE + tristate "IPv6: GRE tunnel" + select IPV6_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This particular tunneling driver implements + GRE (Generic Routing Encapsulation) and at this time allows + encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure. + This driver is useful if the other endpoint is a Cisco router: Cisco + likes GRE much better than the other Linux tunneling driver ("IP + tunneling" above). In addition, GRE allows multicast redistribution + through the tunnel. + + Saying M here will produce a module called ip6_gre. If unsure, say N. + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac1..b6d3f79151e2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o +obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c new file mode 100644 index 000000000000..a84ad5dc4fcf --- /dev/null +++ b/net/ipv6/ip6_gre.c @@ -0,0 +1,1790 @@ +/* + * GRE over IPv6 protocol decoder. + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) +#define IPV6_TCLASS_SHIFT 20 + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static int ip6gre_net_id __read_mostly; +struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + + struct net_device *fb_tunnel_dev; +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static int ip6gre_tunnel_init(struct net_device *dev); +static void ip6gre_tunnel_setup(struct net_device *dev); +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +static u32 HASH_ADDR(const struct in6_addr *addr) +{ + u32 hash = ipv6_addr_hash(addr); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +#define tunnels_r_l tunnels[3] +#define tunnels_r tunnels[2] +#define tunnels_l tunnels[1] +#define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and RTNL + */ + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; + +static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; +} + +/* Given src, dst and key, find appropriate for input tunnel. */ + +static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, + const struct in6_addr *remote, const struct in6_addr *local, + __be32 key, __be16 gre_proto) +{ + struct net *net = dev_net(dev); + int link = dev->ifindex; + unsigned int h0 = HASH_ADDR(remote); + unsigned int h1 = HASH_KEY(key); + struct ip6_tnl *t, *cand = NULL; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; + + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { + if ((!ipv6_addr_equal(local, &t->parms.laddr) && + (!ipv6_addr_equal(local, &t->parms.raddr) || + !ipv6_addr_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + if (cand != NULL) + return cand; + + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); + + return NULL; +} + +static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, + const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = HASH_KEY(p->i_key); + int prio = 0; + + if (!ipv6_addr_any(local)) + prio |= 1; + if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { + prio |= 2; + h ^= HASH_ADDR(remote); + } + + return &ign->tunnels[prio][h]; +} + +static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) +{ + return __ip6gre_bucket(ign, &t->parms); +} + +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, + const struct __ip6_tnl_parm *parms, + int type) +{ + const struct in6_addr *remote = &parms->raddr; + const struct in6_addr *local = &parms->laddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip6_tnl *t; + struct ip6_tnl __rcu **tp; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + for (tp = __ip6gre_bucket(ign, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + + return t; +} + +static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, + const struct __ip6_tnl_parm *parms, int create) +{ + struct ip6_tnl *t, *nt; + struct net_device *dev; + char name[IFNAMSIZ]; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t || !create) + return t; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else + strcpy(name, "ip6gre%d"); + + dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + if (!dev) + return NULL; + + dev_net_set(dev, net); + + nt = netdev_priv(dev); + nt->parms = *parms; + dev->rtnl_link_ops = &ip6gre_link_ops; + + nt->dev = dev; + ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + return nt; + +failed_free: + free_netdev(dev); + return NULL; +} + +static void ip6gre_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + dev_put(dev); +} + + +static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + __be16 *p = (__be16 *)(ipv6h + 1); + int grehlen = sizeof(ipv6h) + 4; + struct ip6_tnl *t; + __be16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (skb_headlen(skb) < grehlen) + return; + + rcu_read_lock(); + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); + if (t == NULL) + goto out; + + switch (type) { + __u32 teli; + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 mtu; + case ICMPV6_DEST_UNREACH: + net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); + break; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); + } + break; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); + + if (teli && teli == info - 2) { + tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; + if (tel->encap_limit == 0) { + net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); + } + } else { + net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } + break; + case ICMPV6_PKT_TOOBIG: + mtu = info - offset; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; + break; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; +out: + rcu_read_unlock(); +} + +static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; + + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); + + if (INET_ECN_is_ce(dsfield)) + IP_ECN_set_ce(ip_hdr(skb)); +} + +static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); + + if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) + IP6_ECN_set_ce(ipv6_hdr(skb)); +} + +static int ip6gre_rcv(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + u8 *h; + __be16 flags; + __sum16 csum = 0; + __be32 key = 0; + u32 seqno = 0; + struct ip6_tnl *tunnel; + int offset = 4; + __be16 gre_proto; + + if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + goto drop_nolock; + + ipv6h = ipv6_hdr(skb); + h = skb->data; + flags = *(__be16 *)h; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop_nolock; + + if (flags&GRE_CSUM) { + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + if (!csum) + break; + /* fall through */ + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + } + offset += 4; + } + if (flags&GRE_KEY) { + key = *(__be32 *)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(__be32 *)(h + offset)); + offset += 4; + } + } + + gre_proto = *(__be16 *)(h + 2); + + rcu_read_lock(); + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, key, + gre_proto); + if (tunnel) { + struct pcpu_tstats *tstats; + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { + tunnel->dev->stats.rx_dropped++; + goto drop; + } + + secpath_reset(skb); + + skb->protocol = gre_proto; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { + skb->protocol = htons(ETH_P_IP); + if ((*(h + offset) & 0xF0) != 0x40) + offset += 4; + } + + skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); + skb->pkt_type = PACKET_HOST; + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && + (s32)(seqno - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + if (skb->protocol == htons(ETH_P_IP)) + ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb); + + netif_rx(skb); + + rcu_read_unlock(); + return 0; + } + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + rcu_read_unlock(); +drop_nolock: + kfree_skb(skb); + return 0; +} + +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); + + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; + + opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; +} + +static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, + struct net_device *dev, + __u8 dsfield, + struct flowi6 *fl6, + int encap_limit, + __u32 *pmtu) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *tunnel = netdev_priv(dev); + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *ipv6h; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int gre_hlen; + struct ipv6_tel_txoption opt; + int mtu; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device_stats *stats = &tunnel->dev->stats; + int err = -1; + u8 proto; + int pkt_len; + struct sk_buff *new_skb; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { + gre_hlen = 0; + ipv6h = (struct ipv6hdr *)skb->data; + fl6->daddr = ipv6h->daddr; + } else { + gre_hlen = tunnel->hlen; + fl6->daddr = tunnel->parms.raddr; + } + + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(tunnel); + + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + tunnel->parms.name); + goto tx_err_dst_release; + } + + mtu = dst_mtu(dst) - sizeof(*ipv6h); + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; + } + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->len > mtu) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; + } + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + if (!new_skb) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + consume_skb(skb); + skb = new_skb; + } + + skb_dst_drop(skb); + + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } + + skb->transport_header = skb->network_header; + + proto = NEXTHDR_GRE; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + } + + skb_push(skb, gre_hlen); + skb_reset_network_header(skb); + + /* + * Push down and install the IP header. + */ + ipv6h = ipv6_hdr(skb); + *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); + dsfield = INET_ECN_encapsulate(0, dsfield); + ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ipv6h->hop_limit = tunnel->parms.hop_limit; + ipv6h->nexthdr = proto; + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; + + ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), + skb->len - sizeof(struct ipv6hdr)); + } + } + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); + + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } + + if (ndst) + ip6_tnl_dst_store(tunnel, ndst); + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + const struct iphdr *iph = ip_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + dsfield = ipv4_get_dsfield(iph); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + return -1; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, + const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = skb->protocol; + + err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + + return err; +} + +static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + if (!ip6_tnl_xmit_ctl(t)) + return -1; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip6gre_xmit_ipv4(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6gre_xmit_ipv6(skb, dev); + break; + default: + ret = ip6gre_xmit_other(skb, dev); + break; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int addend = sizeof(struct ipv6hdr) + 4; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + } + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && + p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + /* Precalculate GRE options length */ + if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (t->parms.o_flags&GRE_CSUM) + addend += 4; + if (t->parms.o_flags&GRE_KEY) + addend += 4; + if (t->parms.o_flags&GRE_SEQ) + addend += 4; + } + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(dev_net(dev), + &p->raddr, &p->laddr, + p->link, strict); + + if (rt == NULL) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + + if (set_mtu) { + dev->mtu = rt->dst.dev->mtu - addend; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + } + dst_release(&rt->dst); + } + + t->hlen = addend; +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + ip6_tnl_dst_reset(t); + ip6gre_tnl_link_config(t, set_mtu); + return 0; +} + +static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, + const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->i_flags = u->i_flags; + p->o_flags = u->o_flags; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, + const struct __ip6_tnl_parm *p) +{ + u->proto = IPPROTO_GRE; + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->i_flags = p->i_flags; + u->o_flags = p->o_flags; + memcpy(u->name, p->name, sizeof(u->name)); +} + +static int ip6gre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == ign->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + } + if (t == NULL) + t = netdev_priv(dev); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) + goto done; + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); + + if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + t = netdev_priv(dev); + + ip6gre_tunnel_unlink(ign, t); + synchronize_net(); + ip6gre_tnl_change(t, &p1, 1); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + } + + if (t) { + err = 0; + + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + if (dev == ign->fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(ign->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} + +static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned int len) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); + __be16 *p = (__be16 *)(ipv6h+1); + + *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); + if (daddr) + memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); + if (!ipv6_addr_any(&ipv6h->daddr)) + return t->hlen; + + return -t->hlen; +} + +static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb); + memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr)); + return sizeof(struct in6_addr); +} + +static const struct header_ops ip6gre_header_ops = { + .create = ip6gre_header, + .parse = ip6gre_header_parse, +}; + +static const struct net_device_ops ip6gre_netdev_ops = { + .ndo_init = ip6gre_tunnel_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static void ip6gre_tunnel_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &ip6gre_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->type = ARPHRD_IP6GRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->flags |= IFF_NOARP; + dev->iflink = 0; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); + + if (ipv6_addr_any(&tunnel->parms.raddr)) + dev->header_ops = &ip6gre_header_ops; + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void ip6gre_fb_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + tunnel->hlen = sizeof(struct ipv6hdr) + 4; + + dev_hold(dev); +} + + +static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = ip6gre_rcv, + .err_handler = ip6gre_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, + struct list_head *head) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip6_tnl *t; + + t = rtnl_dereference(ign->tunnels[prio][h]); + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = rtnl_dereference(t->next); + } + } + } +} + +static int __net_init ip6gre_init_net(struct net *net) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + ip6gre_tunnel_setup); + if (!ign->fb_tunnel_dev) { + err = -ENOMEM; + goto err_alloc_dev; + } + dev_net_set(ign->fb_tunnel_dev, net); + + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); + ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; + + err = register_netdev(ign->fb_tunnel_dev); + if (err) + goto err_reg_dev; + + rcu_assign_pointer(ign->tunnels_wc[0], + netdev_priv(ign->fb_tunnel_dev)); + return 0; + +err_reg_dev: + ip6gre_dev_free(ign->fb_tunnel_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6gre_exit_net(struct net *net) +{ + struct ip6gre_net *ign; + LIST_HEAD(list); + + ign = net_generic(net, ip6gre_net_id); + rtnl_lock(); + ip6gre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations ip6gre_net_ops = { + .init = ip6gre_init_net, + .exit = ip6gre_exit_net, + .id = &ip6gre_net_id, + .size = sizeof(struct ip6gre_net), +}; + +static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct in6_addr daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + if (ipv6_addr_any(&daddr)) + return -EINVAL; + } + +out: + return ip6gre_tunnel_validate(tb, data); +} + + +static void ip6gre_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); + + if (data[IFLA_GRE_FLOWINFO]) + parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + + if (data[IFLA_GRE_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); +} + +static int ip6gre_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ip6gre_tnl_link_config(tunnel, 1); + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static const struct net_device_ops ip6gre_tap_netdev_ops = { + .ndo_init = ip6gre_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->netdev_ops = &ip6gre_tap_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ip6_tnl *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &nt->parms); + + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + nt->dev = dev; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t, *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct __ip6_tnl_parm p; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &p); + + t = ip6gre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + return 0; +} + +static size_t ip6gre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_GRE_FLOWINFO */ + nla_total_size(4) + + /* IFLA_GRE_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) || + nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || + /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ + nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || + nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tunnel_setup, + .validate = ip6gre_tunnel_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { + .kind = "ip6gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tap_setup, + .validate = ip6gre_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +/* + * And now the modules code and kernel interface. + */ + +static int __init ip6gre_init(void) +{ + int err; + + pr_info("GRE over IPv6 tunneling driver\n"); + + err = register_pernet_device(&ip6gre_net_ops); + if (err < 0) + return err; + + err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); + if (err < 0) { + pr_info("%s: can't add protocol\n", __func__); + goto add_proto_failed; + } + + err = rtnl_link_register(&ip6gre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ip6gre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: + return err; + +tap_ops_failed: + rtnl_link_unregister(&ip6gre_link_ops); +rtnl_link_failed: + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_device(&ip6gre_net_ops); + goto out; +} + +static void __exit ip6gre_fini(void) +{ + rtnl_link_unregister(&ip6gre_tap_ops); + rtnl_link_unregister(&ip6gre_link_ops); + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); + unregister_pernet_device(&ip6gre_net_ops); +} + +module_init(ip6gre_init); +module_exit(ip6gre_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); +MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9a1d5fe6aef8..33d2a0e6712d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { struct dst_entry *dst = t->dst_cache; @@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) return dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); -static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) +void ip6_tnl_dst_reset(struct ip6_tnl *t) { dst_release(t->dst_cache); t->dst_cache = NULL; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; dst_release(t->dst_cache); t->dst_cache = dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ **/ static struct ip6_tnl __rcu ** -ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) +ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev) * created tunnel or NULL **/ -static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) +static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; @@ -322,7 +325,7 @@ failed: **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, - struct ip6_tnl_parm *p, int create) + struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) * else index to encapsulation limit **/ -static __u16 -parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) +__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; @@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) } return 0; } +EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /** * ip6_tnl_err - tunnel error handler @@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, case ICMPV6_PARAMPROB: teli = 0; if ((*code) == ICMPV6_HDR_FIELD) - teli = parse_tlv_tnl_enc_lim(skb, skb->data); + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; @@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } -static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, +__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; @@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, } return flags; } +EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, +int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); /** * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally @@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); + /** * ip6_tnl_xmit2 - encapsulate packet and send * @skb: the outgoing socket buffer @@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@ -1152,7 +1159,7 @@ tx_err: static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) **/ static int -ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) +ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) return 0; } +static void +ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->proto = u->proto; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->proto = p->proto; + memcpy(u->name, p->name, sizeof(u->name)); +} + /** * ip6_tnl_ioctl - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel @@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip6_tnl_parm p; + struct __ip6_tnl_parm p1; struct ip6_tnl *t = NULL; struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -1274,11 +1310,12 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; break; } - t = ip6_tnl_locate(net, &p, 0); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof (p)); + ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; } @@ -1295,7 +1332,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; - t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { @@ -1307,13 +1345,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_unlink(ip6n, t); synchronize_net(); - err = ip6_tnl_change(t, &p); + err = ip6_tnl_change(t, &p1); ip6_tnl_link(ip6n, t); netdev_state_change(dev); } if (t) { err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; } else @@ -1329,7 +1368,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; err = -ENOENT; - if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) -- cgit v1.2.1 From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:37 +0000 Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and __netpoll_setup() slave_enable_netpoll() and __netpoll_setup() may be called with read_lock() held, so should use GFP_ATOMIC to allocate memory. Eric suggested to pass gfp flags to __netpoll_setup(). Cc: Eric Dumazet Cc: "David S. Miller" Reported-by: Dan Carpenter Signed-off-by: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 ++++--- net/bridge/br_device.c | 12 ++++++------ net/bridge/br_if.c | 2 +- net/bridge/br_private.h | 4 ++-- net/core/netpoll.c | 8 ++++---- 5 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 73a2a83ee2da..ee4ae0944cef 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -669,19 +669,20 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, + gfp_t gfp) { struct vlan_dev_priv *info = vlan_dev_priv(dev); struct net_device *real_dev = info->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + netpoll = kzalloc(sizeof(*netpoll), gfp); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev); + err = __netpoll_setup(netpoll, real_dev, gfp); if (err) { kfree(netpoll); goto out; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 333484537600..ed0e0f9dc788 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -213,7 +213,8 @@ static void br_netpoll_cleanup(struct net_device *dev) } } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, + gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; @@ -222,8 +223,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) list_for_each_entry_safe(p, n, &br->port_list, list) { if (!p->dev) continue; - - err = br_netpoll_enable(p); + err = br_netpoll_enable(p, gfp); if (err) goto fail; } @@ -236,17 +236,17 @@ fail: goto out; } -int br_netpoll_enable(struct net_bridge_port *p) +int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { struct netpoll *np; int err = 0; - np = kzalloc(sizeof(*p->np), GFP_KERNEL); + np = kzalloc(sizeof(*p->np), gfp); err = -ENOMEM; if (!np) goto out; - err = __netpoll_setup(np, p->dev); + err = __netpoll_setup(np, p->dev, gfp); if (err) { kfree(np); goto out; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e1144e1617be..171fd6b9bfe6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; err = netdev_set_master(dev, br->dev); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a768b2408edf..f507d2af9646 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -extern int br_netpoll_enable(struct net_bridge_port *p); +extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); extern void br_netpoll_disable(struct net_bridge_port *p); #else static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) @@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p) +static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b4c90e42b443..37cc854774a4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -715,7 +715,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) { struct netpoll_info *npinfo; const struct net_device_ops *ops; @@ -734,7 +734,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + npinfo = kmalloc(sizeof(*npinfo), gfp); if (!npinfo) { err = -ENOMEM; goto out; @@ -752,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); if (err) goto free_npinfo; } @@ -857,7 +857,7 @@ int netpoll_setup(struct netpoll *np) refill_skbs(); rtnl_lock(); - err = __netpoll_setup(np, ndev); + err = __netpoll_setup(np, ndev, GFP_KERNEL); rtnl_unlock(); if (err) -- cgit v1.2.1 From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:38 +0000 Subject: netpoll: make __netpoll_cleanup non-block Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup() may be called with read_lock() held too, so we should make them non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 6 +----- net/bridge/br_device.c | 6 +----- net/core/netpoll.c | 42 ++++++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ee4ae0944cef..b65623f90660 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -704,11 +704,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) info->netpoll = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(netpoll); - kfree(netpoll); + __netpoll_free_rcu(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ed0e0f9dc788..f41ba4048c9a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(np); - kfree(np); + __netpoll_free_rcu(np); } #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 37cc854774a4..dc17f1db1479 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -878,6 +878,24 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); +static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) +{ + struct netpoll_info *npinfo = + container_of(rcu_head, struct netpoll_info, rcu); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + + /* we can't call cancel_delayed_work_sync here, as we are in softirq */ + cancel_delayed_work(&npinfo->tx_work); + + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + /* now cancel it again */ + cancel_delayed_work(&npinfo->tx_work); + kfree(npinfo); +} + void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -903,20 +921,24 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); + call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu_bh(); +static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +{ + struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_delayed_work_sync(&npinfo->tx_work); + __netpoll_cleanup(np); + kfree(np); +} - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - } +void __netpoll_free_rcu(struct netpoll *np) +{ + call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); } -EXPORT_SYMBOL_GPL(__netpoll_cleanup); +EXPORT_SYMBOL_GPL(__netpoll_free_rcu); void netpoll_cleanup(struct netpoll *np) { -- cgit v1.2.1 From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:40 +0000 Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx() In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(), this patch fixes it by using the 'npinfo' passed from netpoll_rx() where it is already dereferenced with rcu_dereference_bh(). Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index dc17f1db1479..d055bb01328b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -543,13 +543,12 @@ static void arp_reply(struct sk_buff *skb) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -int __netpoll_rx(struct sk_buff *skb) +int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { int proto, len, ulen; int hits = 0; const struct iphdr *iph; struct udphdr *uh; - struct netpoll_info *npinfo = skb->dev->npinfo; struct netpoll *np, *tmp; if (list_empty(&npinfo->rx_np)) -- cgit v1.2.1 From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:42 +0000 Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev() This patch fixes several problems in the call path of netpoll_send_skb_on_dev(): 1. Disable IRQ's before calling netpoll_send_skb_on_dev(). 2. All the callees of netpoll_send_skb_on_dev() should use rcu_dereference_bh() to dereference ->npinfo. 3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d055bb01328b..174346ac15a0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -54,7 +54,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void arp_reply(struct sk_buff *skb); +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -170,7 +170,8 @@ static void poll_napi(struct net_device *dev) list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(dev->npinfo, napi, budget); + budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), + napi, budget); spin_unlock(&napi->poll_lock); if (!budget) @@ -185,13 +186,14 @@ static void service_arp_queue(struct netpoll_info *npi) struct sk_buff *skb; while ((skb = skb_dequeue(&npi->arp_tx))) - arp_reply(skb); + netpoll_arp_reply(skb, npi); } } static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; + struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); if (!dev || !netif_running(dev)) return; @@ -206,17 +208,18 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); if (dev->flags & IFF_SLAVE) { - if (dev->npinfo) { + if (ni) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; - while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->arp_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + skb_queue_tail(&bond_ni->arp_tx, skb); } } } - service_arp_queue(dev->npinfo); + service_arp_queue(ni); zap_completion_queue(); } @@ -302,6 +305,7 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } +/* call with IRQ disabled */ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { @@ -309,8 +313,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, unsigned long tries; const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ - struct netpoll_info *npinfo = np->dev->npinfo; + struct netpoll_info *npinfo; + + WARN_ON_ONCE(!irqs_disabled()); + npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; @@ -319,11 +326,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - unsigned long flags; txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { @@ -347,10 +352,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } WARN_ONCE(!irqs_disabled(), - "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", dev->name, ops->ndo_start_xmit); - local_irq_restore(flags); } if (status != NETDEV_TX_OK) { @@ -423,9 +427,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void arp_reply(struct sk_buff *skb) +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; -- cgit v1.2.1 From d30362c0712eb567334b3b66de7c40d4372f2c6f Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:43 +0000 Subject: bridge: add some comments for NETDEV_RELEASE Add comments on why we don't notify NETDEV_RELEASE. Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_if.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 171fd6b9bfe6..1c8fdc3558cd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ del_nbp(p); spin_lock_bh(&br->lock); -- cgit v1.2.1 From 4e3828c4bfd90b00a951cad7c8da27d1966beefe Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:44 +0000 Subject: bridge: use list_for_each_entry() in netpoll functions We don't delete 'p' from the list in the loop, so we can just use list_for_each_entry(). Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_device.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f41ba4048c9a..32211fa5b506 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -206,21 +206,20 @@ static void br_poll_controller(struct net_device *br_dev) static void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) br_netpoll_disable(p); - } } static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; int err = 0; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; err = br_netpoll_enable(p, gfp); -- cgit v1.2.1 From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:45 +0000 Subject: netpoll: check netpoll tx status on the right device Although this doesn't matter actually, because netpoll_tx_running() doesn't use the parameter, the code will be more readable. For team_dev_queue_xmit() we have to move it down to avoid compile errors. Cc: David Miller Signed-off-by: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e9466d412707..02015a505d2a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { skb->dev = to->dev; - if (unlikely(netpoll_tx_running(to->dev))) { + if (unlikely(netpoll_tx_running(to->br->dev))) { if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { -- cgit v1.2.1 From f3da38932b46d1bf171634cc7aae3da405eaf7a6 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:47 +0000 Subject: vlan: clean up some variable names To be consistent, s/info/vlan/. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b65623f90660..0347d48aa7c9 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -672,8 +672,8 @@ static void vlan_dev_poll_controller(struct net_device *dev) static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, gfp_t gfp) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct net_device *real_dev = info->real_dev; + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; @@ -688,7 +688,7 @@ static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *n goto out; } - info->netpoll = netpoll; + vlan->netpoll = netpoll; out: return err; @@ -696,13 +696,13 @@ out: static void vlan_dev_netpoll_cleanup(struct net_device *dev) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct netpoll *netpoll = info->netpoll; + struct vlan_dev_priv *vlan= vlan_dev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; - info->netpoll = NULL; + vlan->netpoll = NULL; __netpoll_free_rcu(netpoll); } -- cgit v1.2.1 From 6eacf8ad8d01c49b95b994b0bf379db2b5b29460 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:48 +0000 Subject: vlan: clean up vlan_dev_hard_start_xmit() Clean up vlan_dev_hard_start_xmit() function. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 0347d48aa7c9..402442402af7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return rc; } +static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { + vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; - vlan_tci = vlan_dev_priv(dev)->vlan_id; + vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb->dev = vlan_dev_priv(dev)->real_dev; + skb->dev = vlan->real_dev; len = skb->len; - if (netpoll_tx_running(dev)) - return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); + if (unlikely(netpoll_tx_running(dev))) + return vlan_netpoll_send_skb(vlan, skb); + ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; - stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); + stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { - this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); + this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; -- cgit v1.2.1 From 689971b44613883ee74ae9c1b31a864aaa3a8e17 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:49 +0000 Subject: netpoll: handle vlan tags in netpoll tx and rx path Without this patch, I can't get netconsole logs remotely over vlan. The reason is probably we don't handle vlan tags in either netpoll tx or rx path. I am not sure if I use these vlan functions correctly, at least this patch works. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 174346ac15a0..e4ba3e70c174 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -334,6 +335,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { + if (vlan_tx_tag_present(skb) && + !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + break; + skb->vlan_tci = 0; + } + status = ops->ndo_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); @@ -567,6 +576,12 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) return 1; } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + proto = ntohs(eth_hdr(skb)->h_proto); if (proto != ETH_P_IP) goto out; -- cgit v1.2.1 From 6bdb7fe31046ac50b47e83c35cd6c6b6160a475d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:50 +0000 Subject: netpoll: re-enable irq in poll_napi() napi->poll() needs IRQ enabled, so we have to re-enable IRQ before calling it. Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e4ba3e70c174..346b1eb83a1f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -168,16 +168,24 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int budget = 16; + WARN_ON_ONCE(!irqs_disabled()); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + local_irq_enable(); if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { + rcu_read_lock_bh(); budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), napi, budget); + rcu_read_unlock_bh(); spin_unlock(&napi->poll_lock); - if (!budget) + if (!budget) { + local_irq_disable(); break; + } } + local_irq_disable(); } } -- cgit v1.2.1 From 7bd86cc282a458b66c41e3f6676de6656c99b8db Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 12 Aug 2012 20:09:59 +0000 Subject: ipv4: Cache local output routes Commit caacf05e5ad1abf causes big drop of UDP loop back performance. The cause of the regression is that we do not cache the local output routes. Each time we send a datagram from unconnected UDP socket, the kernel allocates a dst_entry and adds it to the rt_uncached_list. It creates lock contention on the rt_uncached_lock. Reported-by: Alex Shi Signed-off-by: Yan, Zheng Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ba974f143c..fd9ecb52c66b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2028,7 +2028,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } dev_out = net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; - res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } -- cgit v1.2.1 From 4855d6f3116e891b66198838b683dce3dcf6e874 Mon Sep 17 00:00:00 2001 From: Igor Maravic Date: Sun, 12 Aug 2012 22:31:58 +0000 Subject: net: ipv6: proc: Fix error handling Fix error handling in case making of dir dev_snmp6 failes Signed-off-by: Igor Maravic Signed-off-by: David S. Miller --- net/ipv6/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d05c15..745a32042950 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) goto proc_dev_snmp6_fail; return 0; +proc_dev_snmp6_fail: + proc_net_remove(net, "snmp6"); proc_snmp6_fail: proc_net_remove(net, "sockstat6"); -proc_dev_snmp6_fail: - proc_net_remove(net, "dev_snmp6"); return -ENOMEM; } -- cgit v1.2.1 From 418a99ac6ad487dc9c42e6b0e85f941af56330f2 Mon Sep 17 00:00:00 2001 From: Priyanka Jain Date: Sun, 12 Aug 2012 21:22:29 +0000 Subject: Replace rwlock on xfrm_policy_afinfo with rcu xfrm_policy_afinfo is read mosly data structure. Write on xfrm_policy_afinfo is done only at the time of configuration. So rwlocks can be safely replaced with RCU. RCUs usage optimizes the performance. Signed-off-by: Priyanka Jain Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..5ad4d2c4b83c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -42,13 +42,14 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; static DEFINE_RWLOCK(xfrm_policy_lock); -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); +static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); @@ -2418,7 +2419,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock_bh(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -2439,9 +2440,9 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = xfrm_garbage_collect_deferred; - xfrm_policy_afinfo[afinfo->family] = afinfo; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock_bh(&xfrm_policy_afinfo_lock); rtnl_lock(); for_each_net(net) { @@ -2474,13 +2475,14 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock_bh(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; else { struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], + NULL); dst_ops->kmem_cachep = NULL; dst_ops->check = NULL; dst_ops->negative_advice = NULL; @@ -2488,7 +2490,8 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) afinfo->garbage_collect = NULL; } } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock_bh(&xfrm_policy_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -2497,16 +2500,16 @@ static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - read_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[AF_INET]; + rcu_read_lock_bh(); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; #if IS_ENABLED(CONFIG_IPV6) - afinfo = xfrm_policy_afinfo[AF_INET6]; + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; #endif - read_unlock_bh(&xfrm_policy_afinfo_lock); + rcu_read_unlock_bh(); } static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) @@ -2514,16 +2517,16 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); if (unlikely(!afinfo)) - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_unlock(); return afinfo; } -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) { - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_unlock(); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) -- cgit v1.2.1 From ad5b310228da567e35a2ea5dcb2fd62e3a36654e Mon Sep 17 00:00:00 2001 From: Igor Maravic Date: Mon, 13 Aug 2012 10:26:08 +0200 Subject: net: ipv4: fib_trie: Don't unnecessarily search for already found fib leaf We've already found leaf, don't search for it again. Same is for fib leaf info. Signed-off-by: Igor Maravic Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f84a0e90d675..4587d344046d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1656,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!l) return -ESRCH; - fa_head = get_fa_head(l, plen); + li = find_leaf_info(l, plen); + + if (!li) + return -ESRCH; + + fa_head = &li->falh; fa = fib_find_alias(fa_head, tos, 0); if (!fa) @@ -1692,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); - l = fib_find_node(t, key); - li = find_leaf_info(l, plen); - list_del_rcu(&fa->fa_list); if (!plen) -- cgit v1.2.1 From 6024935f5ff5f1646bce8404416318e5fd4a0c4a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:49:59 +0000 Subject: llc2: Fix silent failure of llc_station_init() llc_station_init() creates and processes an event skb with no effect other than to change the state from DOWN to UP. Allocation failure is reported, but then ignored by its caller, llc2_init(). Remove this possibility by simply initialising the state as UP. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 6828e39ec2ec..45ddbb93c5d0 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb) llc_station_state_process(skb); } -int __init llc_station_init(void) +void __init llc_station_init(void) { - int rc = -ENOBUFS; - struct sk_buff *skb; - struct llc_station_state_ev *ev; - skb_queue_head_init(&llc_main_station.mac_pdu_q); skb_queue_head_init(&llc_main_station.ev_q.list); spin_lock_init(&llc_main_station.ev_q.lock); @@ -700,20 +696,9 @@ int __init llc_station_init(void) (unsigned long)&llc_main_station); llc_main_station.ack_timer.expires = jiffies + sysctl_llc_station_ack_timeout; - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) - goto out; - rc = 0; llc_set_station_handler(llc_station_rcv); - ev = llc_station_ev(skb); - memset(ev, 0, sizeof(*ev)); llc_main_station.maximum_retry = 1; - llc_main_station.state = LLC_STATION_STATE_DOWN; - ev->type = LLC_STATION_EV_TYPE_SIMPLE; - ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK; - rc = llc_station_next_state(skb); -out: - return rc; + llc_main_station.state = LLC_STATION_STATE_UP; } void __exit llc_station_exit(void) -- cgit v1.2.1 From f4f8720febf0d785a054fc09bde5e3ad09728a58 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:50:43 +0000 Subject: llc2: Call llc_station_exit() on llc2_init() failure path Otherwise the station packet handler will remain registered even though the module is unloaded. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/af_llc.c | 5 +++-- net/llc/llc_station.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f6fe4d400502..8c2919ca36f6 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1206,7 +1206,7 @@ static int __init llc2_init(void) rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); - goto out_unregister_llc_proto; + goto out_station; } rc = llc_sysctl_init(); if (rc) { @@ -1226,7 +1226,8 @@ out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); -out_unregister_llc_proto: +out_station: + llc_station_exit(); proto_unregister(&llc_proto); goto out; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 45ddbb93c5d0..bba5184fafd7 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -701,7 +701,7 @@ void __init llc_station_init(void) llc_main_station.state = LLC_STATION_STATE_UP; } -void __exit llc_station_exit(void) +void llc_station_exit(void) { llc_set_station_handler(NULL); } -- cgit v1.2.1 From aadf31de16a7b2878af00a02e6557df84efa784b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:50:55 +0000 Subject: llc: Fix races between llc2 handler use and (un)registration When registering the handlers, any state they rely on must be completely initialised first. When unregistering, we must wait until they are definitely no longer running. llc_rcv() must also avoid reading the handler pointers again after checking for NULL. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_input.c | 21 +++++++++++++++++---- net/llc/llc_station.c | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index e32cab44ea95..dd3e83328ad5 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap, void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, struct sk_buff *skb)) { + smp_wmb(); /* ensure initialisation is complete before it's called */ if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = handler; } @@ -50,11 +51,19 @@ void llc_remove_pack(int type) { if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = NULL; + synchronize_net(); } void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) { + /* Ensure initialisation is complete before it's called */ + if (handler) + smp_wmb(); + llc_station_handler = handler; + + if (!handler) + synchronize_net(); } /** @@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int dest; int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); if (!net_eq(dev_net(dev), &init_net)) goto drop; @@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); else @@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, if (cskb) rcv(cskb, dev, pt, orig_dev); } - llc_type_handlers[dest - 1](sap, skb); + sap_handler(sap, skb); } llc_sap_put(sap); out: @@ -202,9 +214,10 @@ drop: kfree_skb(skb); goto out; handle_station: - if (!llc_station_handler) + sta_handler = ACCESS_ONCE(llc_station_handler); + if (!sta_handler) goto drop; - llc_station_handler(skb); + sta_handler(skb); goto out; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index bba5184fafd7..b2f2bac2c2a2 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -696,9 +696,9 @@ void __init llc_station_init(void) (unsigned long)&llc_main_station); llc_main_station.ack_timer.expires = jiffies + sysctl_llc_station_ack_timeout; - llc_set_station_handler(llc_station_rcv); llc_main_station.maximum_retry = 1; llc_main_station.state = LLC_STATION_STATE_UP; + llc_set_station_handler(llc_station_rcv); } void llc_station_exit(void) -- cgit v1.2.1 From 2787b04b6c5e7607510e8248b38b0aeacb5505f6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 13 Aug 2012 05:49:39 +0000 Subject: packet: Introduce net/packet/internal.h header The diag module will need to access some private packet_sock data, so move it to a header in advance. This file will be shared between the af_packet.c and the diag.c Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/af_packet.c | 99 +---------------------------------------------- net/packet/internal.h | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 97 deletions(-) create mode 100644 net/packet/internal.h (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..8a1605ae4029 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,8 @@ #include #endif +#include "internal.h" + /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) /* Private packet socket structures. */ -struct packet_mclist { - struct packet_mclist *next; - int ifindex; - int count; - unsigned short type; - unsigned short alen; - unsigned char addr[MAX_ADDR_LEN]; -}; /* identical to struct packet_mreq except it has * a longer address field. */ @@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -/* kbdq - kernel block descriptor queue */ -struct tpacket_kbdq_core { - struct pgv *pkbdq; - unsigned int feature_req_word; - unsigned int hdrlen; - unsigned char reset_pending_on_curr_blk; - unsigned char delete_blk_timer; - unsigned short kactive_blk_num; - unsigned short blk_sizeof_priv; - - /* last_kactive_blk_num: - * trick to see if user-space has caught up - * in order to avoid refreshing timer when every single pkt arrives. - */ - unsigned short last_kactive_blk_num; - - char *pkblk_start; - char *pkblk_end; - int kblk_size; - unsigned int knum_blocks; - uint64_t knxt_seq_num; - char *prev; - char *nxt_offset; - struct sk_buff *skb; - - atomic_t blk_fill_in_prog; - - /* Default is set to 8ms */ -#define DEFAULT_PRB_RETIRE_TOV (8) - - unsigned short retire_blk_tov; - unsigned short version; - unsigned long tov_in_jiffies; - - /* timer to retire an outstanding block */ - struct timer_list retire_blk_timer; -}; - #define PGV_FROM_VMALLOC 1 -struct pgv { - char *buffer; -}; - -struct packet_ring_buffer { - struct pgv *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; - - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; - - struct tpacket_kbdq_core prb_bdqc; - atomic_t pending; -}; #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) @@ -269,34 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -struct packet_fanout; -struct packet_sock { - /* struct sock has to be the first member of packet_sock */ - struct sock sk; - struct packet_fanout *fanout; - struct tpacket_stats stats; - union tpacket_stats_u stats_u; - struct packet_ring_buffer rx_ring; - struct packet_ring_buffer tx_ring; - int copy_thresh; - spinlock_t bind_lock; - struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, - origdev:1, - has_vnet_hdr:1; - int ifindex; /* bound device */ - __be16 num; - struct packet_mclist *mclist; - atomic_t mapped; - enum tpacket_versions tp_version; - unsigned int tp_hdrlen; - unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tstamp; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - #define PACKET_FANOUT_MAX 256 struct packet_fanout { @@ -334,11 +244,6 @@ struct packet_skb_cb { (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) -static struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} - static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); diff --git a/net/packet/internal.h b/net/packet/internal.h new file mode 100644 index 000000000000..2c5fca28b242 --- /dev/null +++ b/net/packet/internal.h @@ -0,0 +1,103 @@ +#ifndef __PACKET_INTERNAL_H__ +#define __PACKET_INTERNAL_H__ + +struct packet_mclist { + struct packet_mclist *next; + int ifindex; + int count; + unsigned short type; + unsigned short alen; + unsigned char addr[MAX_ADDR_LEN]; +}; + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +struct pgv { + char *buffer; +}; + +struct packet_ring_buffer { + struct pgv *pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + struct tpacket_kbdq_core prb_bdqc; + atomic_t pending; +}; + +struct packet_fanout; +struct packet_sock { + /* struct sock has to be the first member of packet_sock */ + struct sock sk; + struct packet_fanout *fanout; + struct tpacket_stats stats; + union tpacket_stats_u stats_u; + struct packet_ring_buffer rx_ring; + struct packet_ring_buffer tx_ring; + int copy_thresh; + spinlock_t bind_lock; + struct mutex pg_vec_lock; + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1, + origdev:1, + has_vnet_hdr:1; + int ifindex; /* bound device */ + __be16 num; + struct packet_mclist *mclist; + atomic_t mapped; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; + unsigned int tp_loss:1; + unsigned int tp_tstamp; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +static struct packet_sock *pkt_sk(struct sock *sk) +{ + return (struct packet_sock *)sk; +} + +#endif -- cgit v1.2.1 From 96ec6327144e1ac9e6676e34fae8b49c2102fa5a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 13 Aug 2012 05:53:28 +0000 Subject: packet: Diag core and basic socket info dumping The diag module can be built independently from the af_packet.ko one, just like it's done in unix sockets. The core dumping message carries the info available at socket creation time, i.e. family, type and protocol (in the same byte order as shown in the proc file). The socket inode number and cookie is reserved for future per-socket info retrieving. The per-protocol filtering is also reserved for future by requiring the sdiag_protocol to be zero. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/Kconfig | 8 ++++ net/packet/Makefile | 2 + net/packet/diag.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 net/packet/diag.c (limited to 'net') diff --git a/net/packet/Kconfig b/net/packet/Kconfig index 0060e3b396b7..cc55b35f80e5 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -14,3 +14,11 @@ config PACKET be called af_packet. If unsure, say Y. + +config PACKET_DIAG + tristate "Packet: sockets monitoring interface" + depends on PACKET + default n + ---help--- + Support for PF_PACKET sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/packet/Makefile b/net/packet/Makefile index 81183eabfdec..9df61347a3c3 100644 --- a/net/packet/Makefile +++ b/net/packet/Makefile @@ -3,3 +3,5 @@ # obj-$(CONFIG_PACKET) += af_packet.o +obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o +af_packet_diag-y += diag.o diff --git a/net/packet/diag.c b/net/packet/diag.c new file mode 100644 index 000000000000..ff2f7f5bfb8f --- /dev/null +++ b/net/packet/diag.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + u32 pid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct packet_diag_msg *rp; + const struct packet_sock *po = pkt_sk(sk); + + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + if (!nlh) + return -EMSGSIZE; + + rp = nlmsg_data(nlh); + rp->pdiag_family = AF_PACKET; + rp->pdiag_type = sk->sk_type; + rp->pdiag_num = ntohs(po->num); + rp->pdiag_ino = sk_ino; + sock_diag_save_cookie(sk, rp->pdiag_cookie); + + return nlmsg_end(skb, nlh); +} + +static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int num = 0, s_num = cb->args[0]; + struct packet_diag_req *req; + struct net *net; + struct sock *sk; + struct hlist_node *node; + + net = sock_net(skb->sk); + req = nlmsg_data(cb->nlh); + + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) + goto done; +next: + num++; + } +done: + rcu_read_unlock(); + cb->args[0] = num; + + return skb->len; +} + +static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct packet_diag_req); + struct net *net = sock_net(skb->sk); + struct packet_diag_req *req; + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + req = nlmsg_data(h); + /* Make it possible to support protocol filtering later */ + if (req->sdiag_protocol) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = packet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler packet_diag_handler = { + .family = AF_PACKET, + .dump = packet_diag_handler_dump, +}; + +static int __init packet_diag_init(void) +{ + return sock_diag_register(&packet_diag_handler); +} + +static void __exit packet_diag_exit(void) +{ + sock_diag_unregister(&packet_diag_handler); +} + +module_init(packet_diag_init); +module_exit(packet_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); -- cgit v1.2.1 From 8a360be0c5f8fe2c46f0a524886fa56596534193 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 13 Aug 2012 05:55:46 +0000 Subject: packet: Report more packet sk info via diag module This reports in one rtattr message all the other scalar values, that can be set on a packet socket with setsockopt. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/diag.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'net') diff --git a/net/packet/diag.c b/net/packet/diag.c index ff2f7f5bfb8f..d5bd0372d004 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -7,6 +7,31 @@ #include "internal.h" +static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct packet_diag_info pinfo; + + pinfo.pdi_index = po->ifindex; + pinfo.pdi_version = po->tp_version; + pinfo.pdi_reserve = po->tp_reserve; + pinfo.pdi_copy_thresh = po->copy_thresh; + pinfo.pdi_tstamp = po->tp_tstamp; + + pinfo.pdi_flags = 0; + if (po->running) + pinfo.pdi_flags |= PDI_RUNNING; + if (po->auxdata) + pinfo.pdi_flags |= PDI_AUXDATA; + if (po->origdev) + pinfo.pdi_flags |= PDI_ORIGDEV; + if (po->has_vnet_hdr) + pinfo.pdi_flags |= PDI_VNETHDR; + if (po->tp_loss) + pinfo.pdi_flags |= PDI_LOSS; + + return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { @@ -25,7 +50,15 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag rp->pdiag_ino = sk_ino; sock_diag_save_cookie(sk, rp->pdiag_cookie); + if ((req->pdiag_show & PACKET_SHOW_INFO) && + pdiag_put_info(po, skb)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.2.1 From eea68e2f1a0061e09265992b91fdc0014930ae92 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 13 Aug 2012 05:57:44 +0000 Subject: packet: Report socket mclist info via diag module The info is reported as an array of packet_diag_mclist structures. Each includes not only the directly configured values (index, type, etc), but also the "count". Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/diag.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net') diff --git a/net/packet/diag.c b/net/packet/diag.c index d5bd0372d004..3dda4ecb9a46 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,40 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); } +static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct nlattr *mca; + struct packet_mclist *ml; + + mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + if (!mca) + return -EMSGSIZE; + + rtnl_lock(); + for (ml = po->mclist; ml; ml = ml->next) { + struct packet_diag_mclist *dml; + + dml = nla_reserve_nohdr(nlskb, sizeof(*dml)); + if (!dml) { + rtnl_unlock(); + nla_nest_cancel(nlskb, mca); + return -EMSGSIZE; + } + + dml->pdmc_index = ml->ifindex; + dml->pdmc_type = ml->type; + dml->pdmc_alen = ml->alen; + dml->pdmc_count = ml->count; + BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr)); + memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr)); + } + + rtnl_unlock(); + nla_nest_end(nlskb, mca); + + return 0; +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { @@ -54,6 +89,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag pdiag_put_info(po, skb)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_MCLIST) && + pdiag_put_mclist(po, skb)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.1 From 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Aug 2012 08:54:51 +0000 Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock Cong Wang reports that lockdep detected suspicious RCU usage while enabling IPV6 forwarding: [ 1123.310275] =============================== [ 1123.442202] [ INFO: suspicious RCU usage. ] [ 1123.558207] 3.6.0-rc1+ #109 Not tainted [ 1123.665204] ------------------------------- [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section! [ 1123.992320] [ 1123.992320] other info that might help us debug this: [ 1123.992320] [ 1124.307382] [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0 [ 1124.522220] 2 locks held by sysctl/5710: [ 1124.648364] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_trylock+0x15/0x17 [ 1124.882211] #1: (rcu_read_lock){.+.+.+}, at: [] rcu_lock_acquire+0x0/0x29 [ 1125.085209] [ 1125.085209] stack backtrace: [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109 [ 1125.441291] Call Trace: [ 1125.545281] [] lockdep_rcu_suspicious+0x109/0x112 [ 1125.667212] [] rcu_preempt_sleep_check+0x45/0x47 [ 1125.781838] [] __might_sleep+0x1e/0x19b [...] [ 1127.445223] [] call_netdevice_notifiers+0x4a/0x4f [...] [ 1127.772188] [] dev_disable_lro+0x32/0x6b [ 1127.885174] [] dev_forward_change+0x30/0xcb [ 1128.013214] [] addrconf_forward_change+0x85/0xc5 [...] addrconf_forward_change() uses RCU iteration over the netdev list, which is unnecessary since it already holds the RTNL lock. We also cannot reasonably require netdevice notifier functions not to sleep. Reported-by: Cong Wang Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79181819a24f..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) -- cgit v1.2.1 From 57f5d0d1d9f8e59819cb0ab4b707364c54b5b2d1 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:32:54 -0300 Subject: Bluetooth: Remove some functions from being exported Some connection related functions are only used inside hci_conn.c so no need to have them exported. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5ad7da217474..724eea980812 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -59,7 +59,7 @@ static void hci_le_connect_cancel(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); } -void hci_acl_connect(struct hci_conn *conn) +static void hci_acl_connect(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct inquiry_entry *ie; @@ -129,7 +129,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); } -void hci_add_sco(struct hci_conn *conn, __u16 handle) +static void hci_add_sco(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_add_sco cp; -- cgit v1.2.1 From 1aef866968223ddfd7268457b642a9233f0b8006 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:32:55 -0300 Subject: Bluetooth: Rename LE and ACL connection functions These names were causing much confusion, so we rename these functions that send HCI commands to be more similar in naming to the actual HCI commands that will be sent. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 724eea980812..c30c507345f8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -30,7 +30,7 @@ #include #include -static void hci_le_connect(struct hci_conn *conn) +static void hci_le_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_create_conn cp; @@ -54,12 +54,12 @@ static void hci_le_connect(struct hci_conn *conn) hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); } -static void hci_le_connect_cancel(struct hci_conn *conn) +static void hci_le_create_connection_cancel(struct hci_conn *conn) { hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); } -static void hci_acl_connect(struct hci_conn *conn) +static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct inquiry_entry *ie; @@ -103,7 +103,7 @@ static void hci_acl_connect(struct hci_conn *conn) hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); } -static void hci_acl_connect_cancel(struct hci_conn *conn) +static void hci_acl_create_connection_cancel(struct hci_conn *conn) { struct hci_cp_create_conn_cancel cp; @@ -245,9 +245,9 @@ static void hci_conn_timeout(struct work_struct *work) case BT_CONNECT2: if (conn->out) { if (conn->type == ACL_LINK) - hci_acl_connect_cancel(conn); + hci_acl_create_connection_cancel(conn); else if (conn->type == LE_LINK) - hci_le_connect_cancel(conn); + hci_le_create_connection_cancel(conn); } break; case BT_CONFIG: @@ -494,7 +494,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, return ERR_PTR(-ENOMEM); le->dst_type = bdaddr_to_le(dst_type); - hci_le_connect(le); + hci_le_create_connection(le); } le->pending_sec_level = sec_level; @@ -518,7 +518,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; - hci_acl_connect(acl); + hci_acl_create_connection(acl); } if (type == ACL_LINK) @@ -771,7 +771,7 @@ void hci_conn_check_pending(struct hci_dev *hdev) conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); if (conn) - hci_acl_connect(conn); + hci_acl_create_connection(conn); hci_dev_unlock(hdev); } -- cgit v1.2.1 From d04aef4cccf203fdfd1716e9ba458060cbab0928 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:32:56 -0300 Subject: Bluetooth: Refactor LE connection into its own function The code that handles LE connection is already quite separated from the rest of the connection procedure, so we can easily put it into its own. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 53 ++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c30c507345f8..0a74399dde5e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -470,6 +470,33 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) } EXPORT_SYMBOL(hci_get_route); +static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, + u8 dst_type, u8 sec_level, u8 auth_type) +{ + struct hci_conn *le; + + le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (!le) { + le = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (le) + return ERR_PTR(-EBUSY); + + le = hci_conn_add(hdev, LE_LINK, dst); + if (!le) + return ERR_PTR(-ENOMEM); + + le->dst_type = bdaddr_to_le(dst_type); + hci_le_create_connection(le); + } + + le->pending_sec_level = sec_level; + le->auth_type = auth_type; + + hci_conn_hold(le); + + return le; +} + /* Create SCO, ACL or LE connection. * Device _must_ be locked */ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -477,33 +504,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, { struct hci_conn *acl; struct hci_conn *sco; - struct hci_conn *le; BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (type == LE_LINK) { - le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); - if (!le) { - le = hci_conn_hash_lookup_state(hdev, LE_LINK, - BT_CONNECT); - if (le) - return ERR_PTR(-EBUSY); - - le = hci_conn_add(hdev, LE_LINK, dst); - if (!le) - return ERR_PTR(-ENOMEM); - - le->dst_type = bdaddr_to_le(dst_type); - hci_le_create_connection(le); - } - - le->pending_sec_level = sec_level; - le->auth_type = auth_type; - - hci_conn_hold(le); - - return le; - } + if (type == LE_LINK) + return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { -- cgit v1.2.1 From db4742756ae2a836618cd5acf599522573589149 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Sat, 28 Jul 2012 22:35:59 -0300 Subject: Bluetooth: Refactor ACL connection into its own function The hci_connect() function was starting to get too complicated to be quickly understood. We can separate the creation of a new ACL connection into its own function. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0a74399dde5e..1d70e9fc7a4c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -497,18 +497,10 @@ static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return le; } -/* Create SCO, ACL or LE connection. - * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u8 dst_type, __u8 sec_level, __u8 auth_type) +static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, + u8 sec_level, u8 auth_type) { struct hci_conn *acl; - struct hci_conn *sco; - - BT_DBG("%s dst %s", hdev->name, batostr(dst)); - - if (type == LE_LINK) - return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { @@ -526,6 +518,26 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, hci_acl_create_connection(acl); } + return acl; +} + +/* Create SCO, ACL or LE connection. + * Device _must_ be locked */ +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, + __u8 dst_type, __u8 sec_level, __u8 auth_type) +{ + struct hci_conn *acl; + struct hci_conn *sco; + + BT_DBG("%s dst %s", hdev->name, batostr(dst)); + + if (type == LE_LINK) + return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); + + acl = hci_connect_acl(hdev, dst, sec_level, auth_type); + if (IS_ERR(acl)) + return acl; + if (type == ACL_LINK) return acl; -- cgit v1.2.1 From b7d839bfff78a01705f3d7b0acd5257dc7b067c9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:32:58 -0300 Subject: Bluetooth: Refactor SCO connection into its own function We can do the same that we did for the other link types, for SCO connections. The only thing that's worth noting is that as SCO links need an ACL link, this functions uses the function that adds an ACL link. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1d70e9fc7a4c..de7df88a396b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -521,29 +521,19 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return acl; } -/* Create SCO, ACL or LE connection. - * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u8 dst_type, __u8 sec_level, __u8 auth_type) +static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, bdaddr_t *dst, + u8 sec_level, u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; - BT_DBG("%s dst %s", hdev->name, batostr(dst)); - - if (type == LE_LINK) - return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); - acl = hci_connect_acl(hdev, dst, sec_level, auth_type); if (IS_ERR(acl)) return acl; - if (type == ACL_LINK) - return acl; - - sco = hci_conn_hash_lookup_ba(hdev, type, dst); + sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst); if (!sco) { - sco = hci_conn_add(hdev, type, dst); + sco = hci_conn_add(hdev, SCO_LINK, dst); if (!sco) { hci_conn_put(acl); return ERR_PTR(-ENOMEM); @@ -572,6 +562,21 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, return sco; } +/* Create SCO, ACL or LE connection. */ +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, + __u8 dst_type, __u8 sec_level, __u8 auth_type) +{ + BT_DBG("%s dst %s", hdev->name, batostr(dst)); + + if (type == LE_LINK) + return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); + + if (type == ACL_LINK) + return hci_connect_acl(hdev, dst, sec_level, auth_type); + + return hci_connect_sco(hdev, dst, sec_level, auth_type); +} + /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { -- cgit v1.2.1 From 4cd2d98340b4f03d5532c30fdaeb451b035429cb Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:32:59 -0300 Subject: Bluetooth: Simplify a the connection type handling Now that we have separate ways of doing connections for each link type, we can do better than an "if" statement to handle each link type. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index de7df88a396b..2e7b7765e0ea 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -568,13 +568,16 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, { BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (type == LE_LINK) + switch (type) { + case LE_LINK: return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); - - if (type == ACL_LINK) + case ACL_LINK: return hci_connect_acl(hdev, dst, sec_level, auth_type); + case SCO_LINK: + return hci_connect_sco(hdev, dst, sec_level, auth_type); + } - return hci_connect_sco(hdev, dst, sec_level, auth_type); + return ERR_PTR(-EINVAL); } /* Check link security requirement */ -- cgit v1.2.1 From e6dd548b9a3c7b3fcdd2fd97880abf7597e8334b Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 27 Jul 2012 19:33:00 -0300 Subject: Bluetooth: Add type information to the hci_connect() debug statement Now that we have a "connect" function for each link type, we should be able to indentify which function is going to be called. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2e7b7765e0ea..98670b1df17b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -566,7 +566,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 dst_type, __u8 sec_level, __u8 auth_type) { - BT_DBG("%s dst %s", hdev->name, batostr(dst)); + BT_DBG("%s dst %s type 0x%x", hdev->name, batostr(dst), type); switch (type) { case LE_LINK: -- cgit v1.2.1 From 61a0cfb008f57ecf7eb28ee762952fb42dc15d15 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 1 Aug 2012 20:34:15 -0300 Subject: Bluetooth: Fix use-after-free bug in SMP If SMP fails, we should always cancel security_timer delayed work. Otherwise, security_timer function may run after l2cap_conn object has been freed. This patch fixes the following warning reported by ODEBUG: WARNING: at lib/debugobjects.c:261 debug_print_object+0x7c/0x8d() Hardware name: Bochs ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x27 Modules linked in: btusb bluetooth Pid: 440, comm: kworker/u:2 Not tainted 3.5.0-rc1+ #4 Call Trace: [] ? free_obj_work+0x4a/0x7f [] warn_slowpath_common+0x7e/0x97 [] warn_slowpath_fmt+0x41/0x43 [] debug_print_object+0x7c/0x8d [] ? __queue_work+0x241/0x241 [] debug_check_no_obj_freed+0x92/0x159 [] slab_free_hook+0x6f/0x77 [] ? l2cap_conn_del+0x148/0x157 [bluetooth] [] kfree+0x59/0xac [] l2cap_conn_del+0x148/0x157 [bluetooth] [] l2cap_recv_frame+0xa77/0xfa4 [bluetooth] [] ? trace_hardirqs_on_caller+0x112/0x1ad [] l2cap_recv_acldata+0xe2/0x264 [bluetooth] [] hci_rx_work+0x235/0x33c [bluetooth] [] ? process_one_work+0x126/0x2fe [] process_one_work+0x185/0x2fe [] ? process_one_work+0x126/0x2fe [] ? lock_acquired+0x1b5/0x1cf [] ? le_scan_work+0x11d/0x11d [bluetooth] [] ? spin_lock_irq+0x9/0xb [] worker_thread+0xcf/0x175 [] ? rescuer_thread+0x175/0x175 [] kthread+0x95/0x9d [] kernel_threadi_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? flush_kthread_worker+0xdb/0xdb [] ? gs_change+0x13/0x13 This bug can be reproduced using hctool lecc or l2test tools and bluetoothd not running. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 901a616c8083..98ffc1b6a6fa 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -267,10 +267,10 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) mgmt_auth_failed(conn->hcon->hdev, conn->dst, hcon->type, hcon->dst_type, reason); - if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) { - cancel_delayed_work_sync(&conn->security_timer); + cancel_delayed_work_sync(&conn->security_timer); + + if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp_chan_destroy(conn); - } } #define JUST_WORKS 0x00 -- cgit v1.2.1 From 09d5d4aa647367eeb42352a2bc9d438b9c703670 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 7 Aug 2012 18:05:04 +0300 Subject: Bluetooth: trivial: Shorten variable scope Make code more clear by moving sk and bt vars to the place where they are actually used. Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/af_bluetooth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 58f9762b339a..1b88fe41f4b1 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -567,8 +567,6 @@ static void bt_seq_stop(struct seq_file *seq, void *v) static int bt_seq_show(struct seq_file *seq, void *v) { - struct sock *sk; - struct bt_sock *bt; struct bt_seq_state *s = seq->private; struct bt_sock_list *l = s->l; bdaddr_t src_baswapped, dst_baswapped; @@ -583,8 +581,8 @@ static int bt_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } else { - sk = sk_entry(v); - bt = bt_sk(sk); + struct sock *sk = sk_entry(v); + struct bt_sock *bt = bt_sk(sk); baswap(&src_baswapped, &bt->src); baswap(&dst_baswapped, &bt->dst); -- cgit v1.2.1 From b2e4f544fddc812d6fe802bab5f600b4b783f45d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2012 16:39:45 -0600 Subject: userns: Convert net/core/scm.c to use kuids and kgids With the existence of kuid_t and kgid_t we can take this further and remove the usage of struct cred altogether, ensuring we don't get cache line misses from reference counts. For now however start simply and do a straight forward conversion I can be certain is correct. In cred_to_ucred use from_kuid_munged and from_kgid_munged as these values are going directly to userspace and we want to use the userspace safe values not -1 when reporting a value that does not map. The earlier conversion that used from_kuid was buggy in that respect. Oops. Cc: Eric Dumazet Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/core/scm.c | 31 +++++++++++++++++++++++-------- net/core/sock.c | 4 ++-- 2 files changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd68ef4..5472ae7a0657 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -45,12 +45,17 @@ static __inline__ int scm_check_creds(struct ucred *creds) { const struct cred *cred = current_cred(); + kuid_t uid = make_kuid(cred->user_ns, creds->uid); + kgid_t gid = make_kgid(cred->user_ns, creds->gid); + + if (!uid_valid(uid) || !gid_valid(gid)) + return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == cred->uid || creds->uid == cred->euid || - creds->uid == cred->suid) || capable(CAP_SETUID)) && - ((creds->gid == cred->gid || creds->gid == cred->egid || - creds->gid == cred->sgid) || capable(CAP_SETGID))) { + ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || + uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && + ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || + gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { return 0; } return -EPERM; @@ -149,6 +154,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; break; case SCM_CREDENTIALS: + { + kuid_t uid; + kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); @@ -166,22 +174,29 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->pid = pid; } + err = -EINVAL; + uid = make_kuid(current_user_ns(), p->creds.uid); + gid = make_kgid(current_user_ns(), p->creds.gid); + if (!uid_valid(uid) || !gid_valid(gid)) + goto error; + if (!p->cred || - (p->cred->euid != p->creds.uid) || - (p->cred->egid != p->creds.gid)) { + !uid_eq(p->cred->euid, uid) || + !gid_eq(p->cred->egid, gid)) { struct cred *cred; err = -ENOMEM; cred = prepare_creds(); if (!cred) goto error; - cred->uid = cred->euid = p->creds.uid; - cred->gid = cred->egid = p->creds.gid; + cred->uid = cred->euid = uid; + cred->gid = cred->egid = gid; if (p->cred) put_cred(p->cred); p->cred = cred; } break; + } default: goto error; } diff --git a/net/core/sock.c b/net/core/sock.c index 6b654b3ddfda..9c7fe4ff30fc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -868,8 +868,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, if (cred) { struct user_namespace *current_ns = current_user_ns(); - ucred->uid = from_kuid(current_ns, cred->euid); - ucred->gid = from_kgid(current_ns, cred->egid); + ucred->uid = from_kuid_munged(current_ns, cred->euid); + ucred->gid = from_kgid_munged(current_ns, cred->egid); } } EXPORT_SYMBOL_GPL(cred_to_ucred); -- cgit v1.2.1 From d04a48b06d63b6d6e9289ca8a5e6e84ebfe39bfd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2012 17:01:57 -0600 Subject: userns: Convert __dev_set_promiscuity to use kuids in audit logs Cc: Klaus Heinrich Kiwi Cc: Eric Paris Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- net/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0cb3fe8d8e72..026bb4a37665 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4492,8 +4492,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; ASSERT_RTNL(); @@ -4525,7 +4525,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current), - uid, gid, + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), audit_get_sessionid(current)); } -- cgit v1.2.1 From 976d020150456fccbd34103fd117fab910eed09c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2012 17:16:53 -0600 Subject: userns: Convert sock_i_uid to return a kuid_t Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 9c7fe4ff30fc..5c6a435717e0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1526,12 +1526,12 @@ void sock_edemux(struct sk_buff *skb) } EXPORT_SYMBOL(sock_edemux); -int sock_i_uid(struct sock *sk) +kuid_t sock_i_uid(struct sock *sk) { - int uid; + kuid_t uid; read_lock_bh(&sk->sk_callback_lock); - uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; + uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID; read_unlock_bh(&sk->sk_callback_lock); return uid; } -- cgit v1.2.1 From a7cb5a49bf64ba64864ae16a6be028f8b0d3cc06 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 01:10:10 -0600 Subject: userns: Print out socket uids in a user namespace aware fashion. Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Arnaldo Carvalho de Melo Cc: Sridhar Samudrala Acked-by: Vlad Yasevich Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/appletalk/atalk_proc.c | 3 ++- net/ipv4/ping.c | 4 +++- net/ipv4/raw.c | 4 +++- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/udp.c | 4 +++- net/ipv6/raw.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 3 ++- net/ipx/ipx_proc.c | 3 ++- net/key/af_key.c | 2 +- net/llc/llc_proc.c | 2 +- net/packet/af_packet.c | 2 +- net/phonet/socket.c | 6 ++++-- net/sctp/proc.c | 6 ++++-- 14 files changed, 34 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index b5b1a221c242..c30f3a0717fb 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -183,7 +183,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) ntohs(at->dest_net), at->dest_node, at->dest_port, sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6232d476f37e..bee5eeb676f8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -845,7 +845,9 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969ea..f2425785d40a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -992,7 +992,9 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..642be8a4c6a3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2382,7 +2382,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, int uid, int *len) + struct seq_file *f, int i, kuid_t uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); int ttd = req->expires - jiffies; @@ -2399,7 +2399,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), @@ -2450,7 +2450,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sk), + from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..53b89817c008 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2110,7 +2110,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..7af88ef01657 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1251,7 +1251,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..4b5b335ebde1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1803,7 +1803,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -1827,7 +1827,7 @@ static void get_openreq6(struct seq_file *seq, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -1877,7 +1877,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sp), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..bbdff07eebe1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1458,7 +1458,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index f8ba30dfecae..02ff7f2f60d4 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -217,7 +217,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "%08X %08X %02X %03d\n", sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 34e418508a67..0481d4b51476 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3661,7 +3661,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) atomic_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a1839c004357..7b4799cfbf8d 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, - sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), llc->link); out: return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..d147317ce9ea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3846,7 +3846,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 0acc943f713a..b7e982782255 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -612,7 +612,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops), &len); } @@ -796,7 +797,8 @@ static int pn_res_seq_show(struct seq_file *seq, void *v) struct sock *sk = *psk; seq_printf(seq, "%02X %5d %lu%n", - (int) (psk - pnres.sk), sock_i_uid(sk), + (int) (psk - pnres.sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), &len); } seq_printf(seq, "%*s\n", 63 - len, ""); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..dc12febc977a 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -216,7 +216,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, - sock_i_uid(sk), sock_i_ino(sk)); + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk)); sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); @@ -324,7 +325,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); seq_printf(seq, " "); -- cgit v1.2.1 From 7064d16e162adf8199f0288b694e6af823ed5431 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 10:34:21 -0600 Subject: userns: Use kgids for sysctl_ping_group_range - Store sysctl_ping_group_range as a paire of kgid_t values instead of a pair of gid_t values. - Move the kgid conversion work from ping_init_sock into ipv4_ping_group_range - For invalid cases reset to the default disabled state. With the kgid_t conversion made part of the original value sanitation from userspace understand how the code will react becomes clearer and it becomes possible to set the sysctl ping group range from something other than the initial user namespace. Cc: Vasiliy Kulikov Acked-by: David S. Miller Signed-off-by: Eric W. Biederman --- net/ipv4/ping.c | 18 ++++++------------ net/ipv4/sysctl_net_ipv4.c | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bee5eeb676f8..8f3d05424a3e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -185,10 +185,10 @@ exit: return sk; } -static void inet_get_ping_group_range_net(struct net *net, gid_t *low, - gid_t *high) +static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, + kgid_t *high) { - gid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.sysctl_ping_group_range; unsigned int seq; do { @@ -203,19 +203,13 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, static int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - gid_t group = current_egid(); - gid_t range[2]; + kgid_t group = current_egid(); struct group_info *group_info = get_current_groups(); int i, j, count = group_info->ngroups; kgid_t low, high; - inet_get_ping_group_range_net(net, range, range+1); - low = make_kgid(&init_user_ns, range[0]); - high = make_kgid(&init_user_ns, range[1]); - if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low)) - return -EACCES; - - if (range[0] <= group && group <= range[1]) + inet_get_ping_group_range_net(net, &low, &high); + if (gid_lte(low, group) && gid_lte(group, high)) return 0; for (i = 0; i < group_info->nblocks; i++) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b5ce96707a3..3e78c79b5586 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -76,9 +76,9 @@ static int ipv4_local_port_range(ctl_table *table, int write, } -static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) +static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { - gid_t *data = table->data; + kgid_t *data = table->data; unsigned int seq; do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -89,12 +89,12 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) +static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { - gid_t *data = table->data; + kgid_t *data = table->data; write_seqlock(&sysctl_local_ports.lock); - data[0] = range[0]; - data[1] = range[1]; + data[0] = low; + data[1] = high; write_sequnlock(&sysctl_local_ports.lock); } @@ -103,21 +103,33 @@ static int ipv4_ping_group_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct user_namespace *user_ns = current_user_ns(); int ret; - gid_t range[2]; + gid_t urange[2]; + kgid_t low, high; ctl_table tmp = { - .data = &range, - .maxlen = sizeof(range), + .data = &urange, + .maxlen = sizeof(urange), .mode = table->mode, .extra1 = &ip_ping_group_range_min, .extra2 = &ip_ping_group_range_max, }; - inet_get_ping_group_range_table(table, range, range + 1); + inet_get_ping_group_range_table(table, &low, &high); + urange[0] = from_kgid_munged(user_ns, low); + urange[1] = from_kgid_munged(user_ns, high); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - if (write && ret == 0) - set_ping_group_range(table, range); + if (write && ret == 0) { + low = make_kgid(user_ns, urange[0]); + high = make_kgid(user_ns, urange[1]); + if (!gid_valid(low) || !gid_valid(high) || + (urange[1] < urange[0]) || gid_lt(high, low)) { + low = make_kgid(&init_user_ns, 1); + high = make_kgid(&init_user_ns, 0); + } + set_ping_group_range(table, low, high); + } return ret; } @@ -786,7 +798,7 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, - .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), + .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, }, @@ -830,8 +842,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) * Sane defaults - nobody may create ping sockets. * Boot scripts should set this to distro-specific group. */ - net->ipv4.sysctl_ping_group_range[0] = 1; - net->ipv4.sysctl_ping_group_range[1] = 0; + net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); tcp_init_mem(net); -- cgit v1.2.1 From 4f82f45730c68fdaf9b0472495a965188404866e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 10:37:59 -0600 Subject: net ip6 flowlabel: Make owner a union of struct pid * and kuid_t Correct a long standing omission and use struct pid in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_PROCESS. This guarantees we don't have issues when pid wraparound occurs. Use a kuid_t in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_USER to add user namespace support. In /proc/net/ip6_flowlabel capture the current pid namespace when opening the file and release the pid namespace when the file is closed ensuring we print the pid owner value that is meaning to the reader of the file. Similarly use from_kuid_munged to print uid values that are meaningful to the reader of the file. This requires exporting pid_nr_ns so that ipv6 can continue to built as a module. Yoiks what silliness Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/ipv6/ip6_flowlabel.c | 50 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9772fbd8a3f5..c836a6a20a34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { + switch (fl->share) { + case IPV6_FL_S_PROCESS: + put_pid(fl->owner.pid); + break; + } if (fl) { release_net(fl->fl_net); kfree(fl->opt); @@ -394,10 +400,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: - fl->owner = current->pid; + fl->owner.pid = get_task_pid(current, PIDTYPE_PID); break; case IPV6_FL_S_USER: - fl->owner = current_euid(); + fl->owner.uid = current_euid(); break; default: err = -EINVAL; @@ -561,7 +567,10 @@ recheck: err = -EPERM; if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || - fl1->owner != fl->owner) + ((fl1->share == IPV6_FL_S_PROCESS) && + (fl1->owner.pid == fl->owner.pid)) || + ((fl1->share == IPV6_FL_S_USER) && + uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -EINVAL; @@ -621,6 +630,7 @@ done: struct ip6fl_iter_state { struct seq_net_private p; + struct pid_namespace *pid_ns; int bucket; }; @@ -699,6 +709,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); @@ -708,7 +719,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned int)ntohl(fl->label), fl->share, - (int)fl->owner, + ((fl->share == IPV6_FL_S_PROCESS) ? + pid_nr_ns(fl->owner.pid, state->pid_ns) : + ((fl->share == IPV6_FL_S_USER) ? + from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : + 0)), atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, @@ -727,8 +742,29 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); + struct seq_file *seq; + struct ip6fl_iter_state *state; + int err; + + err = seq_open_net(inode, file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); + + if (!err) { + seq = file->private_data; + state = ip6fl_seq_private(seq); + rcu_read_lock(); + state->pid_ns = get_pid_ns(task_active_pid_ns(current)); + rcu_read_unlock(); + } + return err; +} + +static int ip6fl_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + put_pid_ns(state->pid_ns); + return seq_release_net(inode, file); } static const struct file_operations ip6fl_seq_fops = { @@ -736,7 +772,7 @@ static const struct file_operations ip6fl_seq_fops = { .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = ip6fl_seq_release, }; static int __net_init ip6_flowlabel_proc_init(struct net *net) -- cgit v1.2.1 From d13fda8564a67341aad257465cf319bdb2327e33 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 12:55:00 -0600 Subject: userns: Convert net/ax25 to use kuid_t where appropriate Cc: Ralf Baechle Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/ax25/ax25_uid.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index e3c579ba6325..957999e43ff7 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -51,14 +51,14 @@ int ax25_uid_policy; EXPORT_SYMBOL(ax25_uid_policy); -ax25_uid_assoc *ax25_findbyuid(uid_t uid) +ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; struct hlist_node *node; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { - if (ax25_uid->uid == uid) { + if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; break; @@ -84,7 +84,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { - res = ax25_uid->uid; + res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; } } @@ -93,9 +93,14 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) return res; case SIOCAX25ADDUID: + { + kuid_t sax25_kuid; if (!capable(CAP_NET_ADMIN)) return -EPERM; - user = ax25_findbyuid(sax->sax25_uid); + sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid); + if (!uid_valid(sax25_kuid)) + return -EINVAL; + user = ax25_findbyuid(sax25_kuid); if (user) { ax25_uid_put(user); return -EEXIST; @@ -106,7 +111,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) return -ENOMEM; atomic_set(&ax25_uid->refcount, 1); - ax25_uid->uid = sax->sax25_uid; + ax25_uid->uid = sax25_kuid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); @@ -114,7 +119,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) write_unlock(&ax25_uid_lock); return 0; - + } case SIOCAX25DELUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -172,7 +177,9 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) struct ax25_uid_assoc *pt; pt = hlist_entry(v, struct ax25_uid_assoc, uid_node); - seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call)); + seq_printf(seq, "%6d %s\n", + from_kuid_munged(seq_user_ns(seq), pt->uid), + ax2asc(buf, &pt->call)); } return 0; } -- cgit v1.2.1 From 3fbc290540a1ed1a8a076ed8f53bee7a38a9f408 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 17:21:27 -0600 Subject: netlink: Make the sending netlink socket availabe in NETLINK_CB The sending socket of an skb is already available by it's port id in the NETLINK_CB. If you want to know more like to examine the credentials on the sending socket you have to look up the sending socket by it's port id and all of the needed functions and data structures are static inside of af_netlink.c. So do the simple thing and pass the sending socket to the receivers in the NETLINK_CB. I intend to use this to get the user namespace of the sending socket in inet_diag so that I can report uids in the context of the process who opened the socket, the same way I report uids in the contect of the process who opens files. Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/netlink/af_netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969da45b..7cb7867cc369 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -912,7 +912,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, + struct sock *ssk) { int ret; struct netlink_sock *nlk = nlk_sk(sk); @@ -921,6 +922,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) if (nlk->netlink_rcv != NULL) { ret = skb->len; skb_set_owner_r(skb, sk); + NETLINK_CB(skb).ssk = ssk; nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -947,7 +949,7 @@ retry: return PTR_ERR(sk); } if (netlink_is_kernel(sk)) - return netlink_unicast_kernel(sk, skb); + return netlink_unicast_kernel(sk, skb, ssk); if (sk_filter(sk, skb)) { err = skb->len; -- cgit v1.2.1 From d06ca9564350184a19b5aae9ac150f1b1306de29 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 17:58:08 -0600 Subject: userns: Teach inet_diag to work with user namespaces Compute the user namespace of the socket that we are replying to and translate the kuids of reported sockets into that user namespace. Cc: Andrew Vagin Acked-by: David S. Miller Acked-by: Pavel Emelyanov Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/ipv4/inet_diag.c | 21 +++++++++++++++------ net/ipv4/udp_diag.c | 5 ++++- 2 files changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 570e61f9611f..8bc005b1435f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -69,6 +69,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -124,7 +125,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #endif - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -199,11 +200,12 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, pid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, @@ -256,14 +258,16 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, + struct user_namespace *user_ns, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, skb, r, pid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -311,6 +315,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } err = sk_diag_fill(sk, rep, req, + sk_user_ns(NETLINK_CB(in_skb).ssk), NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -551,6 +556,7 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; return inet_csk_diag_fill(sk, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).ssk), NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -591,7 +597,9 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, u32 pid, u32 seq, + struct request_sock *req, + struct user_namespace *user_ns, + u32 pid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -625,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { @@ -702,6 +710,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } err = inet_diag_fill_req(skb, sk, req, + sk_user_ns(NETLINK_CB(cb->skb).ssk), NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 16d0960062be..d2f336ea82ca 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -24,7 +24,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, + return inet_sk_diag_fill(sk, NULL, skb, req, + sk_user_ns(NETLINK_CB(cb->skb).ssk), + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -69,6 +71,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = inet_sk_diag_fill(sk, NULL, rep, req, + sk_user_ns(NETLINK_CB(in_skb).ssk), NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { -- cgit v1.2.1 From 9eea9515cb5f3a4416511ef54b1cc98ca04869a1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 10:42:54 -0600 Subject: userns: nfnetlink_log: Report socket uids in the log sockets user namespace At logging instance creation capture the peer netlink socket's user namespace. Use the captured peer user namespace when reporting socket uids to the peer. The peer socket's user namespace is guaranateed to be valid until the user closes the netlink socket. nfnetlink_log removes instances during the final close of a socket. __build_packet_message does not get called after an instance is destroyed. Therefore it is safe to let the peer netlink socket take care of the user namespace reference counting for us. Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/netfilter/nfnetlink_log.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 169ab59ed9d4..4142aac17c3c 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -55,6 +55,7 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; + struct user_namespace *peer_user_ns; /* User namespace of the peer process */ int peer_pid; /* PID of the peer process */ /* configurable parameters */ @@ -132,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid) +instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -162,6 +163,7 @@ instance_create(u_int16_t group_num, int pid) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); + inst->peer_user_ns = user_ns; inst->peer_pid = pid; inst->group_num = group_num; @@ -503,8 +505,11 @@ __build_packet_message(struct nfulnl_instance *inst, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { struct file *file = skb->sk->sk_socket->file; - __be32 uid = htonl(file->f_cred->fsuid); - __be32 gid = htonl(file->f_cred->fsgid); + __be32 uid = htonl(from_kuid_munged(inst->peer_user_ns, + file->f_cred->fsuid)); + __be32 gid = htonl(from_kgid_munged(inst->peer_user_ns, + file->f_cred->fsgid)); + /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) @@ -783,7 +788,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).pid, + sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; -- cgit v1.2.1 From af4c6641f5ad445fe6d0832da42406dbd9a37ce4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 13:42:45 -0600 Subject: net sched: Pass the skb into change so it can access NETLINK_CB cls_flow.c plays with uids and gids. Unless I misread that code it is possible for classifiers to depend on the specific uid and gid values. Therefore I need to know the user namespace of the netlink socket that is installing the packet classifiers. Pass in the rtnetlink skb so I can access the NETLINK_CB of the passed packet. In particular I want access to sk_user_ns(NETLINK_CB(in_skb).ssk). Pass in not the user namespace but the incomming rtnetlink skb into the the classifier change routines as that is generally the more useful parameter. Cc: Jamal Hadi Salim Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/sched/cls_api.c | 2 +- net/sched/cls_basic.c | 3 ++- net/sched/cls_cgroup.c | 3 ++- net/sched/cls_flow.c | 3 ++- net/sched/cls_fw.c | 3 ++- net/sched/cls_route.c | 3 ++- net/sched/cls_rsvp.h | 3 ++- net/sched/cls_tcindex.c | 3 ++- net/sched/cls_u32.c | 3 ++- 9 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6dd1131f2ec1..dc3ef5aef355 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -319,7 +319,7 @@ replay: } } - err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 590960a22a77..344a11b342e5 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -162,7 +162,8 @@ errout: return err; } -static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, +static int basic_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { int err; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..91de66695b4a 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -151,7 +151,8 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; -static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, +static int cls_cgroup_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index ccd08c8dc6a7..ae854f3434b0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -347,7 +347,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static int flow_change(struct tcf_proto *tp, unsigned long base, +static int flow_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8384a4797240..4075a0aef2aa 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -233,7 +233,8 @@ errout: return err; } -static int fw_change(struct tcf_proto *tp, unsigned long base, +static int fw_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 44f405cb9aaf..c10d57bf98f2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -427,7 +427,8 @@ errout: return err; } -static int route4_change(struct tcf_proto *tp, unsigned long base, +static int route4_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 18ab93ec8d7e..494bbb90924a 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -416,7 +416,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -static int rsvp_change(struct tcf_proto *tp, unsigned long base, +static int rsvp_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index fe29420d0b0e..a1293b4ab7a1 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -332,7 +332,8 @@ errout: } static int -tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle, +tcindex_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { struct nlattr *opt = tca[TCA_OPTIONS]; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d45373fb00b9..c7c27bc91b5a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -544,7 +544,8 @@ errout: return err; } -static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, +static int u32_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { -- cgit v1.2.1 From a6c6796c7127de55cfa9bb0cfbb082ec0acd4eab Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 13:49:36 -0600 Subject: userns: Convert cls_flow to work with user namespaces enabled The flow classifier can use uids and gids of the sockets that are transmitting packets and do insert those uids and gids into the packet classification calcuation. I don't fully understand the details but it appears that we can depend on specific uids and gids when making traffic classification decisions. To work with user namespaces enabled map from kuids and kgids into uids and gids in the initial user namespace giving raw integer values the code can play with and depend on. To avoid issues of userspace depending on uids and gids in packet classifiers installed from other user namespaces and getting confused deny all packet classifiers that use uids or gids that are not comming from a netlink socket in the initial user namespace. Cc: Patrick McHardy Cc: Eric Dumazet Cc: Jamal Hadi Salim Cc: Changli Gao Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/sched/cls_flow.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index ae854f3434b0..ce82d0cb1b47 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -193,15 +193,19 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_cred->fsuid; + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { + kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; + return from_kuid(&init_user_ns, skuid); + } return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_cred->fsgid; + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { + kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; + return from_kgid(&init_user_ns, skgid); + } return 0; } @@ -387,6 +391,10 @@ static int flow_change(struct sk_buff *in_skb, if (fls(keymask) - 1 > FLOW_KEY_MAX) return -EOPNOTSUPP; + + if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && + sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) + return -EOPNOTSUPP; } err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); -- cgit v1.2.1 From 8c6e2a941ae74d850a7bf0e5b3f4cd567e0f27dc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 15:50:59 -0600 Subject: userns: Convert xt_LOG to print socket kuids and kgids as uids and gids xt_LOG always writes messages via sb_add via printk. Therefore when xt_LOG logs the uid and gid of a socket a packet came from the values should be converted to be in the initial user namespace. Thus making xt_LOG as user namespace safe as possible. Cc: Pablo Neira Ayuso Cc: Patrick McHardy Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/netfilter/xt_LOG.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index ff5f75fddb15..02a2bf49dcbd 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -363,10 +363,12 @@ static void dump_ipv4_packet(struct sbuff *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + const struct cred *cred = skb->sk->sk_socket->file->f_cred; sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } read_unlock_bh(&skb->sk->sk_callback_lock); } @@ -719,10 +721,12 @@ static void dump_ipv6_packet(struct sbuff *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & XT_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + const struct cred *cred = skb->sk->sk_socket->file->f_cred; sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } read_unlock_bh(&skb->sk->sk_callback_lock); } -- cgit v1.2.1 From da7428080a15189c7acd266d514324f2a2e89e14 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 16:26:52 -0600 Subject: userns xt_recent: Specify the owner/group of ip_list_perms in the initial user namespace xt_recent creates a bunch of proc files and initializes their uid and gids to the values of ip_list_uid and ip_list_gid. When initialize those proc files convert those values to kuids so they can continue to reside on the /proc inode. Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: Jan Engelhardt Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/netfilter/xt_recent.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ae2ad1eec8d0..4635c9b00459 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -317,6 +317,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; + kuid_t uid; + kgid_t gid; #endif unsigned int i; int ret = -EINVAL; @@ -372,6 +374,13 @@ static int recent_mt_check(const struct xt_mtchk_param *par, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS + uid = make_kuid(&init_user_ns, ip_list_uid); + gid = make_kgid(&init_user_ns, ip_list_gid); + if (!uid_valid(uid) || !gid_valid(gid)) { + kfree(t); + ret = -EINVAL; + goto out; + } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { @@ -379,8 +388,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, ret = -ENOMEM; goto out; } - pde->uid = ip_list_uid; - pde->gid = ip_list_gid; + pde->uid = uid; + pde->gid = gid; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); -- cgit v1.2.1 From 26711a791effbea125fea4284f4d1c4fa8f7bc73 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Feb 2012 17:33:59 -0800 Subject: userns: xt_owner: Add basic user namespace support. - Only allow adding matches from the initial user namespace - Add the appropriate conversion functions to handle matches against sockets in other user namespaces. Cc: Jan Engelhardt Cc: Patrick McHardy Cc: Pablo Neira Ayuso Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/netfilter/xt_owner.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 772d7389b337..ca2e577ed8ac 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -17,6 +17,17 @@ #include #include +static int owner_check(const struct xt_mtchk_param *par) +{ + struct xt_owner_match_info *info = par->matchinfo; + + /* For now only allow adding matches from the initial user namespace */ + if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && + (current_user_ns() != &init_user_ns)) + return -EINVAL; + return 0; +} + static bool owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -37,17 +48,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; - if (info->match & XT_OWNER_UID) - if ((filp->f_cred->fsuid >= info->uid_min && - filp->f_cred->fsuid <= info->uid_max) ^ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); + kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + if ((uid_gte(filp->f_cred->fsuid, uid_min) && + uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) return false; + } - if (info->match & XT_OWNER_GID) - if ((filp->f_cred->fsgid >= info->gid_min && - filp->f_cred->fsgid <= info->gid_max) ^ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); + kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + if ((gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) return false; + } return true; } @@ -56,6 +73,7 @@ static struct xt_match owner_mt_reg __read_mostly = { .name = "owner", .revision = 1, .family = NFPROTO_UNSPEC, + .checkentry = owner_check, .match = owner_mt, .matchsize = sizeof(struct xt_owner_match_info), .hooks = (1 << NF_INET_LOCAL_OUT) | -- cgit v1.2.1 From f1f4376307ca45558eb22487022aefceed7385e8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:39:38 +0000 Subject: sctp: Make the port hash table use struct net in it's key. - Add struct net into the port hash table hash calculation - Add struct net inot the struct sctp_bind_bucket so there is a memory of which network namespace a port is allocated in. No need for a ref count because sctp_bind_bucket only exists when there are sockets in the hash table and sockets can not change their network namspace, and sockets already ref count their network namespace. - Add struct net into the key comparison when we are testing to see if we have found the port hash table entry we are looking for. With these changes lookups in the port hash table becomes safe to use in multiple network namespaces. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5e259817a7f3..4316b0f988d4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5769,7 +5769,7 @@ static void sctp_unhash(struct sock *sk) * a fastreuse flag (FIXME: NPI ipg). */ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum); + struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { @@ -5799,11 +5799,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover = low; if (inet_is_reserved_local_port(rover)) continue; - index = sctp_phashfn(rover); + index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) - if (pp->port == rover) + if ((pp->port == rover) && + net_eq(sock_net(sk), pp->net)) goto next; break; next: @@ -5827,10 +5828,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) * to the port number (snum) - we detect that with the * port iterator, pp being NULL. */ - head = &sctp_port_hashtable[sctp_phashfn(snum)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) { - if (pp->port == snum) + if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } } @@ -5881,7 +5882,7 @@ pp_found: pp_not_found: /* If there was a hash table miss, create a new port. */ ret = 1; - if (!pp && !(pp = sctp_bucket_create(head, snum))) + if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum))) goto fail_unlock; /* In either case (hit or miss), make sure fastreuse is 1 only @@ -6113,7 +6114,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) ********************************************************************/ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum) + struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum) { struct sctp_bind_bucket *pp; @@ -6123,6 +6124,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( pp->port = snum; pp->fastreuse = 0; INIT_HLIST_HEAD(&pp->owner); + pp->net = net; hlist_add_head(&pp->node, &head->chain); } return pp; @@ -6142,7 +6144,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; + &sctp_port_hashtable[sctp_phashfn(sock_net(sk), + inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); @@ -6809,7 +6812,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), + inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; -- cgit v1.2.1 From 4cdadcbcb64bdf3ae8bdf3ef5bb2b91c85444cfa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:40:21 +0000 Subject: sctp: Make the endpoint hashtable handle multiple network namespaces - Use struct net in the hash calculation - Use sock_net(endpoint.base.sk) in the endpoint lookups. - On receive calculate the network namespace from skb->dev. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 4 +++- net/sctp/input.c | 19 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 68a385d7c3bd..50c87b4ad765 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -302,11 +302,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, const union sctp_addr *laddr) { struct sctp_endpoint *retval = NULL; - if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { + if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && + net_eq(sock_net(ep->base.sk), net)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; diff --git a/net/sctp/input.c b/net/sctp/input.c index e64d5210ed13..c0ca893ab1d1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -70,7 +70,8 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp); -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr); +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr); static struct sctp_association *__sctp_lookup_association( const union sctp_addr *local, const union sctp_addr *peer, @@ -129,6 +130,7 @@ int sctp_rcv(struct sk_buff *skb) union sctp_addr dest; int family; struct sctp_af *af; + struct net *net = dev_net(skb->dev); if (skb->pkt_type!=PACKET_HOST) goto discard_it; @@ -181,7 +183,7 @@ int sctp_rcv(struct sk_buff *skb) asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(&dest); + ep = __sctp_rcv_lookup_endpoint(net, &dest); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; @@ -723,12 +725,13 @@ discard: /* Insert endpoint into the hash table. */ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); @@ -747,12 +750,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep) /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; @@ -770,7 +774,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep) } /* Look up an endpoint. */ -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr) +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr) { struct sctp_hashbucket *head; struct sctp_ep_common *epb; @@ -778,12 +783,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l struct hlist_node *node; int hash; - hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); + hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); - if (sctp_endpoint_is_match(ep, laddr)) + if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; } -- cgit v1.2.1 From 4110cc255ddec59c79fba4d71cdd948d0a382140 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:41:13 +0000 Subject: sctp: Make the association hashtable handle multiple network namespaces - Use struct net in the hash calculation - Use sock_net(association.base.sk) in the association lookups. - On receive calculate the network namespace from skb->dev. - Pass struct net from receive down to the functions that actually do the association lookup. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 4 +++- net/sctp/endpointola.c | 6 +++-- net/sctp/input.c | 64 +++++++++++++++++++++++++++++++------------------- net/sctp/ipv6.c | 3 ++- 4 files changed, 49 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ebaef3ed6065..a3601f35ac15 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1089,13 +1089,15 @@ out: /* Is this the association we are looking for? */ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, + struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_transport *transport; if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && - (htons(asoc->peer.port) == paddr->v4.sin_port)) { + (htons(asoc->peer.port) == paddr->v4.sin_port) && + net_eq(sock_net(asoc->base.sk), net)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); if (!transport) goto out; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 50c87b4ad765..6b763939345b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -345,7 +345,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport = ntohs(paddr->v4.sin_port); - hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); + hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port, + rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { @@ -388,13 +389,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, { struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; + struct net *net = sock_net(ep->base.sk); bp = &ep->base.bind_addr; /* This function is called with the socket lock held, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { - if (sctp_has_association(&addr->a, paddr)) + if (sctp_has_association(net, &addr->a, paddr)) return 1; } diff --git a/net/sctp/input.c b/net/sctp/input.c index c0ca893ab1d1..a7e9a85b5acb 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -66,13 +66,15 @@ /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, const union sctp_addr *laddr); static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt); @@ -180,7 +182,7 @@ int sctp_rcv(struct sk_buff *skb) !af->addr_valid(&dest, NULL, skb)) goto discard_it; - asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); + asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); if (!asoc) ep = __sctp_rcv_lookup_endpoint(net, &dest); @@ -476,7 +478,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } /* Common lookup code for icmp/icmpv6 error handler. */ -struct sock *sctp_err_lookup(int family, struct sk_buff *skb, +struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctphdr *sctphdr, struct sctp_association **app, struct sctp_transport **tpp) @@ -505,7 +507,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, /* Look for an association that matches the incoming ICMP error * packet. */ - asoc = __sctp_lookup_association(&saddr, &daddr, &transport); + asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport); if (!asoc) return NULL; @@ -588,6 +590,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct inet_sock *inet; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); if (skb->len < ihlen + 8) { ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); @@ -599,7 +602,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, ihlen); - sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; skb->transport_header = savesctp; @@ -803,13 +806,15 @@ hit: /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &asoc->base; /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port); + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, + asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -832,12 +837,13 @@ void sctp_hash_established(struct sctp_association *asoc) /* Remove association from the hash table. */ static void __sctp_unhash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &asoc->base; - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -860,6 +866,7 @@ void sctp_unhash_established(struct sctp_association *asoc) /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt) @@ -874,12 +881,13 @@ static struct sctp_association *__sctp_lookup_association( /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); + hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port), + ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { asoc = sctp_assoc(epb); - transport = sctp_assoc_is_match(asoc, local, peer); + transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) goto hit; } @@ -897,27 +905,29 @@ hit: /* Look up an association. BH-safe. */ SCTP_STATIC -struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr, +struct sctp_association *sctp_lookup_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp) { struct sctp_association *asoc; sctp_local_bh_disable(); - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); sctp_local_bh_enable(); return asoc; } /* Is there an association matching the given local and peer addresses? */ -int sctp_has_association(const union sctp_addr *laddr, +int sctp_has_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_association *asoc; struct sctp_transport *transport; - if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { + if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { sctp_association_put(asoc); return 1; } @@ -943,7 +953,8 @@ int sctp_has_association(const union sctp_addr *laddr, * in certain circumstances. * */ -static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; @@ -983,7 +994,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, &transport); if (asoc) return asoc; } @@ -1006,6 +1017,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, * subsequent ASCONF Chunks. If found, proceed to rule D4. */ static struct sctp_association *__sctp_rcv_asconf_lookup( + struct net *net, sctp_chunkhdr_t *ch, const union sctp_addr *laddr, __be16 peer_port, @@ -1025,7 +1037,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( af->from_addr_param(&paddr, param, peer_port, 0); - return __sctp_lookup_association(laddr, &paddr, transportp); + return __sctp_lookup_association(net, laddr, &paddr, transportp); } @@ -1038,7 +1050,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( * This means that any chunks that can help us identify the association need * to be looked at to find this association. */ -static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1080,7 +1093,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, case SCTP_CID_ASCONF: if (have_auth || sctp_addip_noauth) - asoc = __sctp_rcv_asconf_lookup(ch, laddr, + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, sctp_hdr(skb)->source, transportp); default: @@ -1103,7 +1117,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, * include looking inside of INIT/INIT-ACK chunks or after the AUTH * chunks. */ -static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1123,11 +1138,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, switch (ch->type) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: - return __sctp_rcv_init_lookup(skb, laddr, transportp); + return __sctp_rcv_init_lookup(net, skb, laddr, transportp); break; default: - return __sctp_rcv_walk_lookup(skb, laddr, transportp); + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); break; } @@ -1136,21 +1151,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, } /* Lookup an association for an inbound skb. */ -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); /* Further lookup for INIT/INIT-ACK packets. * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ if (!asoc) - asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp); return asoc; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ed7139ea7978..2165a7ed25f1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -154,6 +154,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_pinfo *np; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); idev = in6_dev_get(skb->dev); @@ -162,7 +163,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, offset); - sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original pointers. */ skb->network_header = saveip; skb->transport_header = savesctp; -- cgit v1.2.1 From 4db67e808640e3934d82ce61ee8e2e89fd877ba8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:42:04 +0000 Subject: sctp: Make the address lists per network namespace - Move the address lists into struct net - Add per network namespace initialization and cleanup - Pass around struct net so it is everywhere I need it. - Rename all of the global variable references into references to the variables moved into struct net Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 3 +- net/sctp/bind_addr.c | 14 ++--- net/sctp/ipv6.c | 17 +++--- net/sctp/protocol.c | 143 +++++++++++++++++++++++++++++---------------------- net/sctp/socket.c | 7 +-- 5 files changed, 104 insertions(+), 80 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a3601f35ac15..ed4930b31341 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1544,7 +1544,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, if (asoc->peer.ipv6_address) flags |= SCTP_ADDR6_PEERSUPP; - return sctp_bind_addr_copy(&asoc->base.bind_addr, + return sctp_bind_addr_copy(sock_net(asoc->base.sk), + &asoc->base.bind_addr, &asoc->ep->base.bind_addr, scope, gfp, flags); } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 4ece451c8d27..a85ce4b3e574 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -52,8 +52,8 @@ #include /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, gfp_t gfp, +static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, + union sctp_addr *, sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses * in 'src' which have a broader scope than 'scope'. */ -int sctp_bind_addr_copy(struct sctp_bind_addr *dest, +int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, sctp_scope_t scope, gfp_t gfp, int flags) @@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, /* Extract the addresses which are relevant for this scope. */ list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, scope, + error = sctp_copy_one_addr(net, dest, &addr->a, scope, gfp, flags); if (error < 0) goto out; @@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, */ if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) { list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, + error = sctp_copy_one_addr(net, dest, &addr->a, SCTP_SCOPE_LINK, gfp, flags); if (error < 0) @@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, } /* Copy out addresses from the global local address list. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *dest, +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, union sctp_addr *addr, sctp_scope_t scope, gfp_t gfp, int flags) @@ -456,7 +456,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, int error = 0; if (sctp_is_any(NULL, addr)) { - error = sctp_copy_local_addr_list(dest, scope, gfp, flags); + error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if * the address type is supported by local sock as diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2165a7ed25f1..bbf15341eb2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { @@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1f89c4e69645..7025d96bae5f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -201,29 +201,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(struct net *net) { struct net_device *dev; struct list_head *pos; struct sctp_af *af; rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); - af->copy_addrlist(&sctp_local_addr_list, dev); + af->copy_addrlist(&net->sctp.local_addr_list, dev); } } rcu_read_unlock(); } /* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(struct net *net) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &net->sctp.local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); list_del(pos); kfree(addr); @@ -231,14 +231,14 @@ static void sctp_free_local_addr_list(void) } /* Copy the local addresses which are valid for 'scope' into 'bp'. */ -int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - gfp_t gfp, int copy_flags) +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, + sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; if (sctp_in_scope(&addr->a, scope)) { @@ -627,14 +627,15 @@ static void sctp_v4_ecn_capable(struct sock *sk) void sctp_addr_wq_timeout_handler(unsigned long arg) { + struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) @@ -648,7 +649,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) goto free_next; in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; - if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + if (ipv6_chk_addr(net, in6, NULL, 0) == 0 && addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; @@ -656,12 +657,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) SCTP_ADDRESS_TICK_DELAY); timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); break; } } #endif - list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) { struct sock *sk; sk = sctp_opt2sk(sp); @@ -679,31 +680,32 @@ free_next: list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } -static void sctp_free_addr_wq(void) +static void sctp_free_addr_wq(struct net *net) { struct sctp_sockaddr_entry *addrw; struct sctp_sockaddr_entry *temp; - spin_lock_bh(&sctp_addr_wq_lock); - del_timer(&sctp_addr_wq_timer); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + spin_lock_bh(&net->sctp.addr_wq_lock); + del_timer(&net->sctp.addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* lookup the entry for the same address in the addr_waitq * sctp_addr_wq MUST be locked */ -static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net, + struct sctp_sockaddr_entry *addr) { struct sctp_sockaddr_entry *addrw; - list_for_each_entry(addrw, &sctp_addr_waitq, list) { + list_for_each_entry(addrw, &net->sctp.addr_waitq, list) { if (addrw->a.sa.sa_family != addr->a.sa.sa_family) continue; if (addrw->a.sa.sa_family == AF_INET) { @@ -719,7 +721,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr return NULL; } -void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd) { struct sctp_sockaddr_entry *addrw; unsigned long timeo_val; @@ -730,38 +732,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) * new address after a couple of addition and deletion of that address */ - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); /* Offsets existing events in addr_wq */ - addrw = sctp_addr_wq_lookup(addr); + addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", " in wq %p\n", addrw->state, &addrw->a, - &sctp_addr_waitq); + &net->sctp.addr_waitq); list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } /* OK, we have to add the new address to the wait queue */ addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); if (addrw == NULL) { - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } addrw->state = cmd; - list_add_tail(&addrw->list, &sctp_addr_waitq); + list_add_tail(&addrw->list, &net->sctp.addr_waitq); SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); - if (!timer_pending(&sctp_addr_wq_timer)) { + if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* Event handler for inet address addition/deletion events. @@ -776,11 +778,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->ifa_dev->dev); int found = 0; - if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) - return NOTIFY_DONE; - switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); @@ -789,27 +789,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -1194,6 +1194,36 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } +static int sctp_net_init(struct net *net) +{ + /* Initialize the local address list. */ + INIT_LIST_HEAD(&net->sctp.local_addr_list); + spin_lock_init(&net->sctp.local_addr_lock); + sctp_get_local_addr_list(net); + + /* Initialize the address event list */ + INIT_LIST_HEAD(&net->sctp.addr_waitq); + INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); + spin_lock_init(&net->sctp.addr_wq_lock); + net->sctp.addr_wq_timer.expires = 0; + setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, + (unsigned long)net); + + return 0; +} + +static void sctp_net_exit(struct net *net) +{ + /* Free the local address list */ + sctp_free_addr_wq(net); + sctp_free_local_addr_list(net); +} + +static struct pernet_operations sctp_net_ops = { + .init = sctp_net_init, + .exit = sctp_net_exit, +}; + /* Initialize the universe into something sensible. */ SCTP_STATIC __init int sctp_init(void) { @@ -1399,18 +1429,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - /* Initialize the local address list. */ - INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); - sctp_get_local_addr_list(); - - /* Initialize the address event list */ - INIT_LIST_HEAD(&sctp_addr_waitq); - INIT_LIST_HEAD(&sctp_auto_asconf_splist); - spin_lock_init(&sctp_addr_wq_lock); - sctp_addr_wq_timer.expires = 0; - setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); - status = sctp_v4_protosw_init(); if (status) @@ -1426,6 +1444,10 @@ SCTP_STATIC __init int sctp_init(void) goto err_ctl_sock_init; } + status = register_pernet_subsys(&sctp_net_ops); + if (status) + goto err_register_pernet_subsys; + status = sctp_v4_add_protocol(); if (status) goto err_add_protocol; @@ -1441,13 +1463,14 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: + unregister_pernet_subsys(&sctp_net_ops); +err_register_pernet_subsys: inet_ctl_sock_destroy(sctp_ctl_sock); err_ctl_sock_init: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: - sctp_free_local_addr_list(); sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1482,18 +1505,16 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); - sctp_free_addr_wq(); /* Free the control endpoint. */ inet_ctl_sock_destroy(sctp_ctl_sock); + unregister_pernet_subsys(&sctp_net_ops); + /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); - /* Free the local address list. */ - sctp_free_local_addr_list(); - /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4316b0f988d4..5b6dd0e3d1f6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3471,7 +3471,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, sp->do_auto_asconf = 0; } else if (val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } return 0; @@ -3964,7 +3964,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (sctp_default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -4653,9 +4653,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, union sctp_addr temp; int cnt = 0; int addrlen; + struct net *net = sock_net(sk); rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; -- cgit v1.2.1 From 2ce955035081112cf1590c961da8d94324142b5e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:43:06 +0000 Subject: sctp: Make the ctl_sock per network namespace - Kill sctp_get_ctl_sock, it is useless now. - Pass struct net where needed so net->sctp.ctl_sock is accessible. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 4 ++-- net/sctp/protocol.c | 47 +++++++++++++++++++---------------------------- net/sctp/sm_statefuns.c | 45 +++++++++++++++++++++++++++++++-------------- 3 files changed, 52 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index a7e9a85b5acb..c9a0449bde53 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -204,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb) sctp_endpoint_put(ep); ep = NULL; } - sk = sctp_get_ctl_sock(); + sk = net->sctp.ctl_sock; ep = sctp_sk(sk)->ep; sctp_endpoint_hold(ep); rcvr = &ep->base; @@ -795,7 +795,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, goto hit; } - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sctp_endpoint_hold(ep); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7025d96bae5f..f20bd708e89c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -78,12 +78,6 @@ struct proc_dir_entry *proc_net_sctp; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); -/* This is the global socket data structure used for responding to - * the Out-of-the-blue (OOTB) packets. A control sock will be created - * for this socket at the initialization time. - */ -static struct sock *sctp_ctl_sock; - static struct sctp_pf *sctp_pf_inet6_specific; static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; @@ -96,12 +90,6 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Return the address of the control sock. */ -struct sock *sctp_get_ctl_sock(void) -{ - return sctp_ctl_sock; -} - /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { @@ -822,7 +810,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, * Initialize the control inode/socket with a control endpoint data * structure. This endpoint is reserved exclusively for the OOTB processing. */ -static int sctp_ctl_sock_init(void) +static int sctp_ctl_sock_init(struct net *net) { int err; sa_family_t family = PF_INET; @@ -830,14 +818,14 @@ static int sctp_ctl_sock_init(void) if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - err = inet_ctl_sock_create(&sctp_ctl_sock, family, - SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + err = inet_ctl_sock_create(&net->sctp.ctl_sock, family, + SOCK_SEQPACKET, IPPROTO_SCTP, net); /* If IPv6 socket could not be created, try the IPv4 socket */ if (err < 0 && family == PF_INET6) - err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, - &init_net); + net); if (err < 0) { pr_err("Failed to create the SCTP control socket\n"); @@ -1196,6 +1184,14 @@ static void sctp_v4_del_protocol(void) static int sctp_net_init(struct net *net) { + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + if ((status = sctp_ctl_sock_init(net))) { + pr_err("Failed to initialize the SCTP control sock\n"); + goto err_ctl_sock_init; + } + /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1210,6 +1206,9 @@ static int sctp_net_init(struct net *net) (unsigned long)net); return 0; + +err_ctl_sock_init: + return status; } static void sctp_net_exit(struct net *net) @@ -1217,6 +1216,9 @@ static void sctp_net_exit(struct net *net) /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); + + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); } static struct pernet_operations sctp_net_ops = { @@ -1438,12 +1440,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_protosw_init; - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init())) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - status = register_pernet_subsys(&sctp_net_ops); if (status) goto err_register_pernet_subsys; @@ -1465,8 +1461,6 @@ err_v6_add_protocol: err_add_protocol: unregister_pernet_subsys(&sctp_net_ops); err_register_pernet_subsys: - inet_ctl_sock_destroy(sctp_ctl_sock); -err_ctl_sock_init: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); @@ -1506,9 +1500,6 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(sctp_ctl_sock); - unregister_pernet_subsys(&sctp_net_ops); /* Free protosw registrations */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9fca10357350..f2daf615e8fe 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -74,7 +74,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands); -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk); static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, const struct sctp_association *asoc, @@ -301,6 +302,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, struct sctp_chunk *err_chunk; struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; + struct net *net; int len; /* 6.10 Bundling @@ -318,7 +320,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + net = sock_net(ep->base.sk); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -646,11 +649,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, int error = 0; struct sctp_chunk *err_chk_p; struct sock *sk; + struct net *net; /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + net = sock_net(ep->base.sk); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -1171,7 +1176,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, /* Helper function to send out an abort for the restart * condition. */ -static int sctp_sf_send_restart_abort(union sctp_addr *ssa, +static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { @@ -1197,7 +1202,7 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa, errhdr->length = htons(len); /* Assign to the control socket. */ - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. @@ -1240,6 +1245,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(new_asoc->base.sk); struct sctp_transport *new_addr; int ret = 1; @@ -1258,7 +1264,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, transports) { if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, &new_addr->ipaddr)) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init, commands); ret = 0; break; @@ -1650,10 +1656,11 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(ep->base.sk); /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) return sctp_sf_ootb(ep, asoc, type, arg, commands); else return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); @@ -3163,8 +3170,10 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. The T bit will be set if the asoc @@ -3425,8 +3434,10 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *shut; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an SHUTDOWN_COMPLETE. @@ -4262,6 +4273,7 @@ static sctp_disposition_t sctp_sf_abort_violation( struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort = NULL; + struct net *net; /* SCTP-AUTH, Section 6.3: * It should be noted that if the receiver wants to tear @@ -4282,6 +4294,7 @@ static sctp_disposition_t sctp_sf_abort_violation( if (!abort) goto nomem; + net = sock_net(ep->base.sk); if (asoc) { /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && @@ -4319,7 +4332,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); } } else { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (!packet) goto nomem_pkt; @@ -5825,8 +5838,10 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, { struct sctp_packet *packet; struct sctp_chunk *abort; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. @@ -5858,7 +5873,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, } /* Allocate a packet for responding in the OOTB conditions. */ -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_packet *packet; @@ -5919,7 +5935,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc * the source address. */ sctp_transport_route(transport, (union sctp_addr *)&chunk->dest, - sctp_sk(sctp_get_ctl_sock())); + sctp_sk(net->sctp.ctl_sock)); packet = sctp_packet_init(&transport->packet, transport, sport, dport); packet = sctp_packet_config(packet, vtag, 0); @@ -5946,7 +5962,8 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (err_chunk) { - packet = sctp_ootb_pkt_new(asoc, chunk); + struct net *net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { struct sctp_signed_cookie *cookie; -- cgit v1.2.1 From 632c928a6a77fe96cda34a9978e1f6019ffc38f4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:44:24 +0000 Subject: sctp: Move the percpu sockets counter out of sctp_proc_init The percpu sctp socket counter has nothing at all to do with the sctp proc files, and having it in the wrong initialization is confusing, and makes network namespace support a pain. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f20bd708e89c..48a5989c98ce 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -93,8 +93,6 @@ int sysctl_sctp_wmem[3]; /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { - if (percpu_counter_init(&sctp_sockets_allocated, 0)) - goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); @@ -125,12 +123,9 @@ out_snmp_proc_init: remove_proc_entry("sctp", init_net.proc_net); } out_free_percpu: - percpu_counter_destroy(&sctp_sockets_allocated); #else return 0; #endif /* CONFIG_PROC_FS */ - -out_nomem: return -ENOMEM; } @@ -151,7 +146,6 @@ static void sctp_proc_exit(void) remove_proc_entry("sctp", init_net.proc_net); } #endif - percpu_counter_destroy(&sctp_sockets_allocated); } /* Private helper to extract ipv4 address and stash them in @@ -1261,6 +1255,10 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_init_mibs; + status = percpu_counter_init(&sctp_sockets_allocated, 0); + if (status) + goto err_percpu_counter_init; + /* Initialize proc fs directory. */ status = sctp_proc_init(); if (status) @@ -1481,6 +1479,8 @@ err_ahash_alloc: sctp_dbg_objcnt_exit(); sctp_proc_exit(); err_init_proc: + percpu_counter_destroy(&sctp_sockets_allocated); +err_percpu_counter_init: cleanup_sctp_mibs(); err_init_mibs: kmem_cache_destroy(sctp_chunk_cachep); @@ -1521,6 +1521,7 @@ SCTP_STATIC __exit void sctp_exit(void) sizeof(struct sctp_bind_hashbucket))); sctp_dbg_objcnt_exit(); + percpu_counter_destroy(&sctp_sockets_allocated); sctp_proc_exit(); cleanup_sctp_mibs(); -- cgit v1.2.1 From 13d782f6b4fbbaf9d0380a9947deb45a9de46ae7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:45:15 +0000 Subject: sctp: Make the proc files per network namespace. - Convert all of the files under /proc/net/sctp to be per network namespace. - Don't print anything for /proc/net/sctp/snmp except in the initial network namespaces as the snmp counters still have to be converted to be per network namespace. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/objcnt.c | 8 ++--- net/sctp/proc.c | 59 ++++++++++++++++++++++++------------ net/sctp/protocol.c | 87 +++++++++++++++++++++++------------------------------ 3 files changed, 81 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 8ef8e7d9eb61..fe012c44f8df 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = { }; /* Initialize the objcount in the proc filesystem. */ -void sctp_dbg_objcnt_init(void) +void sctp_dbg_objcnt_init(struct net *net) { struct proc_dir_entry *ent; ent = proc_create("sctp_dbg_objcnt", 0, - proc_net_sctp, &sctp_objcnt_ops); + net->sctp.proc_net_sctp, &sctp_objcnt_ops); if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(void) +void sctp_dbg_objcnt_exit(struct net *net) { - remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp); + remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..dc79a3aa0382 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -80,8 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; + if (!net_eq(net, &init_net)) + return 0; + for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, snmp_fold_field((void __percpu **)sctp_statistics, @@ -93,7 +97,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) /* Initialize the seq file operations for 'snmp' object. */ static int sctp_snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_snmp_seq_show, NULL); + return single_open_net(inode, file, sctp_snmp_seq_show); } static const struct file_operations sctp_snmp_seq_fops = { @@ -105,11 +109,12 @@ static const struct file_operations sctp_snmp_seq_fops = { }; /* Set up the proc fs entry for 'snmp' object. */ -int __init sctp_snmp_proc_init(void) +int __net_init sctp_snmp_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops); + p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops); if (!p) return -ENOMEM; @@ -117,9 +122,9 @@ int __init sctp_snmp_proc_init(void) } /* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(void) +void sctp_snmp_proc_exit(struct net *net) { - remove_proc_entry("snmp", proc_net_sctp); + remove_proc_entry("snmp", net->sctp.proc_net_sctp); } /* Dump local addresses of an association/endpoint. */ @@ -197,6 +202,7 @@ static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp endpoints (/proc/net/sctp/eps). */ static int sctp_eps_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; @@ -213,6 +219,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), priv->net)) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, @@ -238,7 +246,8 @@ static const struct seq_operations sctp_eps_ops = { /* Initialize the seq file operations for 'eps' object. */ static int sctp_eps_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_eps_ops); + return seq_open_net(inode, file, &sctp_eps_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_eps_seq_fops = { @@ -249,11 +258,12 @@ static const struct file_operations sctp_eps_seq_fops = { }; /* Set up the proc fs entry for 'eps' object. */ -int __init sctp_eps_proc_init(void) +int __net_init sctp_eps_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops); + p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops); if (!p) return -ENOMEM; @@ -261,9 +271,9 @@ int __init sctp_eps_proc_init(void) } /* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(void) +void sctp_eps_proc_exit(struct net *net) { - remove_proc_entry("eps", proc_net_sctp); + remove_proc_entry("eps", net->sctp.proc_net_sctp); } @@ -300,6 +310,7 @@ static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -316,6 +327,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), priv->net)) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " "%4d %8d %8d %7d %5lu %-5d %5d ", @@ -354,7 +367,8 @@ static const struct seq_operations sctp_assoc_ops = { /* Initialize the seq file operations for 'assocs' object. */ static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_assoc_ops); + return seq_open_net(inode, file, &sctp_assoc_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -365,11 +379,11 @@ static const struct file_operations sctp_assocs_seq_fops = { }; /* Set up the proc fs entry for 'assocs' object. */ -int __init sctp_assocs_proc_init(void) +int __net_init sctp_assocs_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("assocs", S_IRUGO, proc_net_sctp, + p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, &sctp_assocs_seq_fops); if (!p) return -ENOMEM; @@ -378,9 +392,9 @@ int __init sctp_assocs_proc_init(void) } /* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(void) +void sctp_assocs_proc_exit(struct net *net) { - remove_proc_entry("assocs", proc_net_sctp); + remove_proc_entry("assocs", net->sctp.proc_net_sctp); } static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) @@ -412,6 +426,7 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -426,6 +441,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { + if (!net_eq(sock_net(epb->sk), priv->net)) + continue; assoc = sctp_assoc(epb); list_for_each_entry(tsp, &assoc->peer.transport_addr_list, transports) { @@ -489,14 +506,15 @@ static const struct seq_operations sctp_remaddr_ops = { }; /* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(void) +void sctp_remaddr_proc_exit(struct net *net) { - remove_proc_entry("remaddr", proc_net_sctp); + remove_proc_entry("remaddr", net->sctp.proc_net_sctp); } static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_remaddr_ops); + return seq_open_net(inode, file, &sctp_remaddr_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_remaddr_seq_fops = { @@ -506,11 +524,12 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release, }; -int __init sctp_remaddr_proc_init(void) +int __net_init sctp_remaddr_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops); + p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops); if (!p) return -ENOMEM; return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 48a5989c98ce..de7994edb4ca 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -71,10 +71,6 @@ struct sctp_globals sctp_globals __read_mostly; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_sctp; -#endif - struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -91,60 +87,52 @@ int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; /* Set up the proc fs entry for the SCTP protocol. */ -static __init int sctp_proc_init(void) +static __net_init int sctp_proc_init(struct net *net) { #ifdef CONFIG_PROC_FS - if (!proc_net_sctp) { - proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); - if (!proc_net_sctp) - goto out_free_percpu; - } - - if (sctp_snmp_proc_init()) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) + goto out_proc_net_sctp; + if (sctp_snmp_proc_init(net)) goto out_snmp_proc_init; - if (sctp_eps_proc_init()) + if (sctp_eps_proc_init(net)) goto out_eps_proc_init; - if (sctp_assocs_proc_init()) + if (sctp_assocs_proc_init(net)) goto out_assocs_proc_init; - if (sctp_remaddr_proc_init()) + if (sctp_remaddr_proc_init(net)) goto out_remaddr_proc_init; return 0; out_remaddr_proc_init: - sctp_assocs_proc_exit(); + sctp_assocs_proc_exit(net); out_assocs_proc_init: - sctp_eps_proc_exit(); + sctp_eps_proc_exit(net); out_eps_proc_init: - sctp_snmp_proc_exit(); + sctp_snmp_proc_exit(net); out_snmp_proc_init: - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } -out_free_percpu: -#else - return 0; -#endif /* CONFIG_PROC_FS */ + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +out_proc_net_sctp: return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; } /* Clean up the proc fs entry for the SCTP protocol. * Note: Do not make this __exit as it is used in the init error * path. */ -static void sctp_proc_exit(void) +static void sctp_proc_exit(struct net *net) { #ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(); - sctp_eps_proc_exit(); - sctp_assocs_proc_exit(); - sctp_remaddr_proc_exit(); - - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } + sctp_snmp_proc_exit(net); + sctp_eps_proc_exit(net); + sctp_assocs_proc_exit(net); + sctp_remaddr_proc_exit(net); + + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; #endif } @@ -1180,6 +1168,13 @@ static int sctp_net_init(struct net *net) { int status; + /* Initialize proc fs directory. */ + status = sctp_proc_init(net); + if (status) + goto err_init_proc; + + sctp_dbg_objcnt_init(net); + /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init(net))) { pr_err("Failed to initialize the SCTP control sock\n"); @@ -1202,6 +1197,9 @@ static int sctp_net_init(struct net *net) return 0; err_ctl_sock_init: + sctp_dbg_objcnt_exit(net); + sctp_proc_exit(net); +err_init_proc: return status; } @@ -1213,6 +1211,10 @@ static void sctp_net_exit(struct net *net) /* Free the control endpoint. */ inet_ctl_sock_destroy(net->sctp.ctl_sock); + + sctp_dbg_objcnt_exit(net); + + sctp_proc_exit(net); } static struct pernet_operations sctp_net_ops = { @@ -1259,14 +1261,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_percpu_counter_init; - /* Initialize proc fs directory. */ - status = sctp_proc_init(); - if (status) - goto err_init_proc; - - /* Initialize object count debugging. */ - sctp_dbg_objcnt_init(); - /* * 14. Suggested SCTP Protocol Parameter Values */ @@ -1476,9 +1470,6 @@ err_ehash_alloc: get_order(sctp_assoc_hashsize * sizeof(struct sctp_hashbucket))); err_ahash_alloc: - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); -err_init_proc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: cleanup_sctp_mibs(); @@ -1520,9 +1511,7 @@ SCTP_STATIC __exit void sctp_exit(void) get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); - sctp_dbg_objcnt_exit(); percpu_counter_destroy(&sctp_sockets_allocated); - sctp_proc_exit(); cleanup_sctp_mibs(); rcu_barrier(); /* Wait for completion of call_rcu()'s */ -- cgit v1.2.1 From bb2db45b5495455ec7580315029184550709f4a2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:46:26 +0000 Subject: sctp: Enable sctp in all network namespaces - Fix the sctp_af operations to work in all namespaces - Enable sctp socket creation in all network namespaces. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 12 ++++++------ net/sctp/protocol.c | 8 +++++--- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bbf15341eb2b..a18cda60e156 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -582,7 +582,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; - return ipv6_chk_addr(&init_net, in6, NULL, 0); + return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0); } /* This function checks if the address is a valid address to be used for @@ -859,14 +859,14 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { + struct net *net; if (!addr->v6.sin6_scope_id) return 0; + net = sock_net(&opt->inet.sk); rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, - addr->v6.sin6_scope_id); + dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); if (!dev || - !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, - dev, 0)) { + !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { rcu_read_unlock(); return 0; } @@ -899,7 +899,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (!addr->v6.sin6_scope_id) return 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, + dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk), addr->v6.sin6_scope_id); rcu_read_unlock(); if (!dev) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index de7994edb4ca..059c914c09f2 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -367,7 +367,8 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); + struct net *net = sock_net(&sp->inet.sk); + int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && @@ -454,7 +455,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl4->daddr, &fl4->saddr); - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) dst = &rt->dst; @@ -500,7 +501,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, (AF_INET == laddr->a.sa.sa_family)) { fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; @@ -1033,6 +1034,7 @@ static const struct net_protocol sctp_protocol = { .handler = sctp_rcv, .err_handler = sctp_v4_err, .no_policy = 1, + .netns_ok = 1, }; /* IPv4 address related functions. */ -- cgit v1.2.1 From b01a24078fa3fc4f0f447d1306ce5adc495ead86 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:47:55 +0000 Subject: sctp: Make the mib per network namespace Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 2 +- net/sctp/endpointola.c | 2 +- net/sctp/input.c | 22 +++---- net/sctp/ipv6.c | 4 +- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 18 +++--- net/sctp/proc.c | 5 +- net/sctp/protocol.c | 27 ++++---- net/sctp/sm_statefuns.c | 163 +++++++++++++++++++++++++++++------------------- net/sctp/ulpqueue.c | 18 ++++-- 11 files changed, 151 insertions(+), 114 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ed4930b31341..8a1f27a7a001 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1150,7 +1150,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) if (sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 6c8556459a75..7c2df9c33df3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i=0, len=first_len; i < whole; i++) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 6b763939345b..3edca80ab3a1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -478,7 +478,7 @@ normal: if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; diff --git a/net/sctp/input.c b/net/sctp/input.c index c9a0449bde53..530830151f04 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -83,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ -static inline int sctp_rcv_checksum(struct sk_buff *skb) +static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; @@ -99,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS); + SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -137,7 +137,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->pkt_type!=PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS); + SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -149,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->len < sizeof(struct sctphdr)) goto discard_it; if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && - sctp_rcv_checksum(skb) < 0) + sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); @@ -220,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -276,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -293,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -543,7 +543,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -593,7 +593,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct net *net = dev_net(skb->dev); if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -607,7 +607,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a18cda60e156..ea14cb445295 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -169,7 +169,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); goto out; } @@ -243,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) __func__, skb, skb->len, &fl6.saddr, &fl6.daddr); - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..0c6359bb973c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -597,7 +597,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e7aa177c9522..072bf6ae3c26 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,6 +299,7 @@ void sctp_outq_free(struct sctp_outq *q) /* Put a new chunk in an sctp_outq. */ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", @@ -337,15 +338,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else - SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); q->empty = 0; break; } } else { list_add_tail(&chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } if (error < 0) @@ -478,11 +479,12 @@ void sctp_retransmit_mark(struct sctp_outq *q, void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; switch(reason) { case SCTP_RTXR_T3_RTX: - SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -493,15 +495,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, transport->asoc->unack_data; break; case SCTP_RTXR_FAST_RTX: - SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: - SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS); break; case SCTP_RTXR_T1_RTX: - SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS); transport->asoc->init_retries++; break; default: @@ -1914,6 +1916,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index dc79a3aa0382..3e62ee55228f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -83,12 +83,9 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; int i; - if (!net_eq(net, &init_net)) - return 0; - for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)sctp_statistics, + snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 059c914c09f2..d58db315db85 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -69,7 +69,6 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -961,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); return ip_queue_xmit(skb, &transport->fl); } @@ -1102,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) return 1; } -static inline int init_sctp_mibs(void) +static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)sctp_statistics, + return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, sizeof(struct sctp_mib), __alignof__(struct sctp_mib)); } -static inline void cleanup_sctp_mibs(void) +static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)sctp_statistics); + snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) @@ -1170,6 +1169,11 @@ static int sctp_net_init(struct net *net) { int status; + /* Allocate and initialise sctp mibs. */ + status = init_sctp_mibs(net); + if (status) + goto err_init_mibs; + /* Initialize proc fs directory. */ status = sctp_proc_init(net); if (status) @@ -1202,6 +1206,8 @@ err_ctl_sock_init: sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); err_init_proc: + cleanup_sctp_mibs(net); +err_init_mibs: return status; } @@ -1217,6 +1223,7 @@ static void sctp_net_exit(struct net *net) sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); + cleanup_sctp_mibs(net); } static struct pernet_operations sctp_net_ops = { @@ -1254,11 +1261,6 @@ SCTP_STATIC __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - /* Allocate and initialise sctp mibs. */ - status = init_sctp_mibs(); - if (status) - goto err_init_mibs; - status = percpu_counter_init(&sctp_sockets_allocated, 0); if (status) goto err_percpu_counter_init; @@ -1474,8 +1476,6 @@ err_ehash_alloc: err_ahash_alloc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: - cleanup_sctp_mibs(); -err_init_mibs: kmem_cache_destroy(sctp_chunk_cachep); err_chunk_cachep: kmem_cache_destroy(sctp_bucket_cachep); @@ -1514,7 +1514,6 @@ SCTP_STATIC __exit void sctp_exit(void) sizeof(struct sctp_bind_hashbucket))); percpu_counter_destroy(&sctp_sockets_allocated); - cleanup_sctp_mibs(); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f2daf615e8fe..bee5e2c288d8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -213,6 +213,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -260,8 +261,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -322,7 +324,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, */ net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -369,7 +371,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); return SCTP_DISPOSITION_CONSUME; } else { return SCTP_DISPOSITION_NOMEM; @@ -540,7 +542,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } @@ -559,7 +561,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_ABORTEDS); return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -656,7 +658,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, */ net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -809,8 +811,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (new_asoc->autoclose) @@ -868,6 +870,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -897,8 +900,9 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, @@ -972,13 +976,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, struct sctp_transport *transport = (struct sctp_transport *) arg; if (asoc->overall_error_count >= asoc->max_retrans) { + struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -1213,7 +1219,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, goto out; sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* Discard the rest of the inbound packet. */ sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); @@ -1427,7 +1433,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; @@ -1791,7 +1797,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(sock_net(new_asoc->base.sk), SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1883,7 +1889,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -2417,6 +2423,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; unsigned int len; __be16 error = SCTP_ERROR_NO_ERROR; + struct net *net; /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); @@ -2433,8 +2440,9 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -2521,7 +2529,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_ABORTEDS); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); @@ -2904,11 +2912,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3197,7 +3205,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_sf_pdiscard(ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; @@ -3260,6 +3268,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -3299,8 +3308,9 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); /* ...and remove all record of the association. */ @@ -3346,8 +3356,10 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, __u8 *ch_end; int ootb_shut_ack = 0; int ootb_cookie_ack = 0; + struct net *net; - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { @@ -3461,7 +3473,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* If the chunk length is invalid, we don't want to process * the reset of the packet. @@ -3508,7 +3520,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTOFBLUES); return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); } @@ -3699,6 +3711,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, */ if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) && !(asoc->addip_last_asconf)) { + struct net *net; abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); if (abort) { @@ -3716,12 +3729,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) { + struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); @@ -3750,8 +3765,9 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -4222,7 +4238,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4315,7 +4331,7 @@ static sctp_disposition_t sctp_sf_abort_violation( } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4329,7 +4345,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); } } else { packet = sctp_ootb_pkt_new(net, asoc, chunk); @@ -4347,10 +4363,10 @@ static sctp_disposition_t sctp_sf_abort_violation( sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); @@ -4410,6 +4426,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( struct sctp_chunk *chunk = arg; struct sctp_paramhdr *param = ext; struct sctp_chunk *abort = NULL; + struct net *net; if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) goto discard; @@ -4419,15 +4436,16 @@ static sctp_disposition_t sctp_sf_violation_paramlen( if (!abort) goto nomem; + net = sock_net(asoc->base.sk); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); @@ -4757,6 +4775,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( */ struct sctp_chunk *abort = arg; sctp_disposition_t retval; + struct net *net; retval = SCTP_DISPOSITION_CONSUME; @@ -4772,8 +4791,9 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return retval; } @@ -4824,13 +4844,15 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(asoc->base.sk); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -4886,6 +4908,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( { struct sctp_chunk *abort = arg; sctp_disposition_t retval; + struct net *net = sock_net(asoc->base.sk); /* Stop T1-init timer */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4897,7 +4920,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -5318,8 +5341,9 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_transport *transport = arg; + struct net *net = sock_net(asoc->base.sk); - SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { @@ -5340,8 +5364,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } } @@ -5403,7 +5427,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); + struct net *net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -5436,9 +5461,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *repl = NULL; struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -5496,9 +5522,10 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep { struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -5543,9 +5570,10 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5555,8 +5583,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, /* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -5613,8 +5641,9 @@ sctp_disposition_t sctp_sf_t4_timer_expire( { struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + struct net *net = sock_net(asoc->base.sk); - SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS); /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in @@ -5639,8 +5668,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5682,9 +5711,10 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -5696,8 +5726,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; nomem: @@ -5716,9 +5746,10 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(asoc->base.sk); int disposition; - SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper @@ -5976,7 +6007,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, sctp_packet_append_chunk(packet, err_chunk); sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } else sctp_chunk_free (err_chunk); } @@ -5996,6 +6027,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + struct net *net; u16 ssn; u16 sid; u8 ordered = 0; @@ -6112,6 +6144,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * No User Data: This error cause is returned to the originator of a * DATA chunk if a received DATA chunk has no user data. */ + net = sock_net(sk); if (unlikely(0 == datalen)) { err = sctp_make_abort_no_data(asoc, chunk, tsn); if (err) { @@ -6126,8 +6159,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_DATA)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_IERROR_NO_DATA; } @@ -6137,9 +6170,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * if we renege and the chunk arrives again. */ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS); else { - SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); ordered = 1; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f5a6a4f4faf7..360d8697b95c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, + struct sk_buff_head *queue, struct sk_buff *f_frag, + struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; @@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu } event = sctp_skb2event(f_frag); - SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS); + SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } @@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(&ulpq->reasm, + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, pd_first, pd_last); if (retval) @@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul done: return retval; found: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); return retval; } -- cgit v1.2.1 From ebb7e95d9351f77a8ec1fca20eb645051401b7b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:23:59 +0000 Subject: sctp: Add infrastructure for per net sysctls Start with an empty sctp_net_table that will be populated as the various tunable sysctls are made per net. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 7 +++++++ net/sctp/sysctl.c | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d58db315db85..0f2342be61f3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1169,6 +1169,10 @@ static int sctp_net_init(struct net *net) { int status; + status = sctp_sysctl_net_register(net); + if (status) + goto err_sysctl_register; + /* Allocate and initialise sctp mibs. */ status = init_sctp_mibs(net); if (status) @@ -1208,6 +1212,8 @@ err_ctl_sock_init: err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: + sctp_sysctl_net_unregister(net); +err_sysctl_register: return status; } @@ -1224,6 +1230,7 @@ static void sctp_net_exit(struct net *net) sctp_proc_exit(net); cleanup_sctp_mibs(net); + sctp_sysctl_net_unregister(net); } static struct pernet_operations sctp_net_ops = { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2b2bfe933ff1..bee36c408dde 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -284,6 +284,27 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; +static ctl_table sctp_net_table[] = { + { /* sentinel */ } +}; + +int sctp_sysctl_net_register(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + return 0; +} + +void sctp_sysctl_net_unregister(struct net *net) +{ + unregister_net_sysctl_table(net->sctp.sysctl_header); +} + static struct ctl_table_header * sctp_sysctl_header; /* Sysctl registration. */ -- cgit v1.2.1 From 55e26eb95a5345a5796babac98de6d6c42771df1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:25:24 +0000 Subject: sctp: Push struct net down to sctp_chunk_event_lookup This trickles up through sctp_sm_lookup_event up to sctp_do_sm and up further into sctp_primitiv_NAME before the code reaches places where struct net can be reliably found. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/associola.c | 5 +++-- net/sctp/endpointola.c | 4 +++- net/sctp/input.c | 4 +++- net/sctp/primitive.c | 4 ++-- net/sctp/sm_sideeffect.c | 24 ++++++++++++++++-------- net/sctp/sm_statetable.c | 11 +++++++---- net/sctp/socket.c | 27 +++++++++++++++++---------- 7 files changed, 51 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8a1f27a7a001..6bcbecafe393 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1118,6 +1118,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) struct sctp_association *asoc = container_of(work, struct sctp_association, base.inqueue.immediate); + struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -1150,13 +1151,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) if (sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); /* Check to see if the association is freed in response to diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3edca80ab3a1..8315792ef2ba 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -413,6 +413,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; + struct net *net; struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -427,6 +428,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) asoc = NULL; inqueue = &ep->base.inqueue; sk = ep->base.sk; + net = sock_net(sk); while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); @@ -483,7 +485,7 @@ normal: if (chunk->transport) chunk->transport->last_time_heard = jiffies; - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); if (error && chunk) diff --git a/net/sctp/input.c b/net/sctp/input.c index 530830151f04..a2ceb70ee06c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -466,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } } else { + struct net *net = sock_net(sk); + if (timer_pending(&t->proto_unreach_timer) && del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index 534c7eae9d15..794bb14decde 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -57,7 +57,7 @@ #define DECLARE_PRIMITIVE(name) \ /* This is called in the code as sctp_primitive_ ## name. */ \ -int sctp_primitive_ ## name(struct sctp_association *asoc, \ +int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \ void *arg) { \ int error = 0; \ sctp_event_t event_type; sctp_subtype_t subtype; \ @@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \ state = asoc ? asoc->state : SCTP_STATE_CLOSED; \ ep = asoc ? asoc->ep : NULL; \ \ - error = sctp_do_sm(event_type, subtype, state, ep, asoc, \ + error = sctp_do_sm(net, event_type, subtype, state, ep, asoc, \ arg, GFP_KERNEL); \ return error; \ } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fe99628e1257..02c4c1c066a7 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); /* Check whether a task is in the sock. */ @@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX), asoc->state, asoc->ep, asoc, @@ -291,6 +292,7 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { + struct net *net = sock_net(asoc->base.sk); int error = 0; sctp_bh_lock_sock(asoc->base.sk); @@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(timeout_type), asoc->state, asoc->ep, asoc, (void *)timeout_type, GFP_ATOMIC); @@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data) if (transport->dead) goto out_unlock; - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) if (asoc->base.dead) goto out_unlock; - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -753,8 +757,10 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, int err = 0; if (sctp_outq_sack(&asoc->outqueue, sackh)) { + struct net *net = sock_net(asoc->base.sk); + /* There are no more TSNs awaiting SACK. */ - err = sctp_do_sm(SCTP_EVENT_T_OTHER, + err = sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); @@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, */ static void sctp_cmd_send_asconf(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); + /* Send the next asconf chunk from the addip chunk * queue. */ @@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) + if (sctp_primitive_ASCONF(net, asoc, asconf)) sctp_chunk_free(asconf); else asoc->addip_last_asconf = asconf; @@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * If you want to understand all of lksctp, this is a * good place to start. */ -int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, +int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, struct sctp_association *asoc, @@ -1110,7 +1118,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* Look up the state function, run it, and then process the * side effects. These three steps are the heart of lksctp. */ - state_fn = sctp_sm_lookup_event(event_type, state, subtype); + state_fn = sctp_sm_lookup_event(net, event_type, state, subtype); sctp_init_cmd_seq(&commands); diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 7c211a7f90f4..4a029d798287 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state); @@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = { rtn; \ }) -const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, sctp_state_t state, sctp_subtype_t event_subtype) { switch (event_type) { case SCTP_EVENT_T_CHUNK: - return sctp_chunk_event_lookup(event_subtype.chunk, state); + return sctp_chunk_event_lookup(net, event_subtype.chunk, state); case SCTP_EVENT_T_TIMEOUT: return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, timeout_event_table); @@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state) { if (state > SCTP_STATE_MAX) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5b6dd0e3d1f6..a6a4226a922f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { + struct net *net = sock_net(asoc->base.sk); int retval = 0; /* If there is an outstanding ASCONF chunk, queue it for later @@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc, /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(chunk); - retval = sctp_primitive_ASCONF(asoc, chunk); + retval = sctp_primitive_ASCONF(net, asoc, chunk); if (retval) sctp_chunk_free(chunk); else @@ -1050,6 +1051,7 @@ static int __sctp_connect(struct sock* sk, int addrs_size, sctp_assoc_t *assoc_id) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; @@ -1200,7 +1202,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) { goto out_free; } @@ -1458,6 +1460,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, */ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; @@ -1499,9 +1502,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) chunk = sctp_make_abort_user(asoc, NULL, 0); if (chunk) - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } /* On a TCP-style socket, block for at most linger_time if set. */ @@ -1569,6 +1572,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t msg_len) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *new_asoc=NULL, *asoc=NULL; @@ -1714,7 +1718,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (sinfo_flags & SCTP_EOF) { SCTP_DEBUG_PRINTK("Shutting down association: %p\n", asoc); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; } @@ -1727,7 +1731,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; } @@ -1900,7 +1904,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Auto-connect, if we aren't connected already. */ if (sctp_state(asoc, CLOSED)) { - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; SCTP_DEBUG_PRINTK("We associated primitively.\n"); @@ -1928,7 +1932,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, * works that way today. Keep it that way or this * breaks. */ - err = sctp_primitive_SEND(asoc, datamsg); + err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ if (err) sctp_datamsg_free(datamsg); @@ -2320,7 +2324,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, int error; if (params->spp_flags & SPP_HB_DEMAND && trans) { - error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); + struct net *net = sock_net(trans->asoc->base.sk); + + error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans); if (error) return error; } @@ -4011,6 +4017,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) */ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -4022,7 +4029,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) if (!list_empty(&ep->asocs)) { asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } } -- cgit v1.2.1 From 89bf3450cb9b041b1bb4bcc5e7cbdeab4545b1c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:26:14 +0000 Subject: sctp: Push struct net down into sctp_transport_init Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/associola.c | 3 ++- net/sctp/sm_statefuns.c | 2 +- net/sctp/transport.c | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6bcbecafe393..93a4513c85e0 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -641,6 +641,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const gfp_t gfp, const int peer_state) { + struct net *net = sock_net(asoc->base.sk); struct sctp_transport *peer; struct sctp_sock *sp; unsigned short port; @@ -674,7 +675,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return peer; } - peer = sctp_transport_new(addr, gfp); + peer = sctp_transport_new(net, addr, gfp); if (!peer) return NULL; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index bee5e2c288d8..ff2530c848b0 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5958,7 +5958,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, } /* Make a transport for the bucket, Eliza... */ - transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC); + transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC); if (!transport) goto nomem; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c97472b248a2..aada963c9d6b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -59,7 +59,8 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, +static struct sctp_transport *sctp_transport_init(struct net *net, + struct sctp_transport *peer, const union sctp_addr *addr, gfp_t gfp) { @@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, } /* Allocate and initialize a new transport. */ -struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, +struct sctp_transport *sctp_transport_new(struct net *net, + const union sctp_addr *addr, gfp_t gfp) { struct sctp_transport *transport; @@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, if (!transport) goto fail; - if (!sctp_transport_init(transport, addr, gfp)) + if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; transport->malloced = 1; -- cgit v1.2.1 From e7ff4a7037e6908b7a5f4682945a0b097d5b3535 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:27:02 +0000 Subject: sctp: Push struct net down into sctp_in_scope struct net will be needed shortly when the tunables are made per network namespace. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/bind_addr.c | 4 ++-- net/sctp/protocol.c | 2 +- net/sctp/sm_make_chunk.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index a85ce4b3e574..23389ba44e39 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, if (sctp_is_any(NULL, addr)) { error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags); - } else if (sctp_in_scope(addr, scope)) { + } else if (sctp_in_scope(net, addr, scope)) { /* Now that the address is in scope, check to see if * the address type is supported by local sock as * well as the remote peer. @@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr) } /* Is 'addr' valid for 'scope'? */ -int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) +int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope) { sctp_scope_t addr_scope = sctp_scope(addr); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0f2342be61f3..59965bdea07a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -210,7 +210,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; - if (sctp_in_scope(&addr->a, scope)) { + if (sctp_in_scope(net, &addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local * sock as well as the remote peer. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 479a70ef6ff8..fb12835e95c2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2466,6 +2466,7 @@ static int sctp_process_param(struct sctp_association *asoc, const union sctp_addr *peer_addr, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_addr addr; int i; __u16 sat; @@ -2494,7 +2495,7 @@ do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); - if (sctp_in_scope(&addr, scope)) + if (sctp_in_scope(net, &addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; -- cgit v1.2.1 From 24cb81a6a91288fcba19548944729ea906eb5e2a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:28:09 +0000 Subject: sctp: Push struct net down into all of the state machine functions There are a handle of state machine functions primarily those dealing with processing INIT packets where there is neither a valid endpoint nor a valid assoication from which to derive a struct net. Therefore add struct net * to the parameter list of sctp_state_fn_t and update all of the state machine functions. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 2 +- net/sctp/sm_statefuns.c | 619 ++++++++++++++++++++++++++--------------------- 2 files changed, 339 insertions(+), 282 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 02c4c1c066a7..bcfebb91559d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1123,7 +1123,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); DEBUG_PRE; - status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands); + status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); DEBUG_POST; error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ff2530c848b0..19f3bff84193 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -66,7 +66,8 @@ #include #include -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -77,34 +78,40 @@ static int sctp_eat_data(const struct sctp_association *asoc, static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, const struct sctp_association *asoc, const struct sctp_chunk *chunk); -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, struct sctp_chunk *err_chunk); -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport); static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -113,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation( const size_t paylen); static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -120,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -127,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -134,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk); -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -205,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_4_C(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -213,10 +227,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; - struct net *net; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 2960 6.10 Bundling * @@ -224,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* RFC 2960 10.2 SCTP-to-ULP @@ -261,7 +274,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); @@ -292,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -304,7 +317,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, struct sctp_chunk *err_chunk; struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; - struct net *net; int len; /* 6.10 Bundling @@ -317,22 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * Normally, this would cause an ABORT with a Protocol Violation @@ -340,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * just discard the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being @@ -349,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * can treat this OOTB */ if (sctp_sstate(ep->base.sk, CLOSING)) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; @@ -360,7 +371,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -377,7 +388,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_NOMEM; } } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -489,7 +500,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -501,18 +513,18 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; @@ -531,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * the association. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -542,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } @@ -559,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_ABORTEDS); - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -638,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -651,15 +664,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, int error = 0; struct sctp_chunk *err_chk_p; struct sock *sk; - struct net *net; /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* Make sure that the COOKIE_ECHO chunk has a valid length. @@ -668,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -677,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sk = ep->base.sk; if (!sctp_sstate(sk, LISTENING) || (sctp_style(sk, TCP) && sk_acceptq_is_full(sk))) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we * are in good shape. @@ -710,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -763,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); auth.transport = chunk->transport; - ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); + ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); /* We can now safely free the auth_chunk clone */ kfree_skb(chunk->auth_chunk); if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -863,23 +874,23 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; - struct net *net; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Reset init error count upon receipt of COOKIE-ACK, @@ -900,7 +911,6 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - net = sock_net(ep->base.sk); SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -967,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, } /* Generate a HEARTBEAT packet on the given transport. */ -sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -976,13 +987,11 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, struct sctp_transport *transport = (struct sctp_transport *) arg; if (asoc->overall_error_count >= asoc->max_retrans) { - struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - net = sock_net(ep->base.sk); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; @@ -1039,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_beat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1050,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, size_t paylen = 0; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 8.3 The receiver of the HEARTBEAT should immediately @@ -1106,7 +1116,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1119,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, unsigned long max_interval; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + sizeof(sctp_sender_hb_info_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; @@ -1213,7 +1224,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. */ - pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len); + pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len); if (!pkt) goto out; @@ -1370,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, * chunk handling. */ static sctp_disposition_t sctp_sf_do_unexpected_init( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -1394,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * In this case, we generate a protocol violation since we have * an association established. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; @@ -1424,7 +1436,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -1433,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; } goto cleanup; } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -1582,7 +1594,8 @@ cleanup: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1591,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } /* @@ -1635,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1644,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } @@ -1657,19 +1671,19 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, * An unexpected INIT ACK usually indicates the processing of an old or * duplicated INIT chunk. */ -sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - struct net *net = sock_net(ep->base.sk); /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) - return sctp_sf_ootb(ep, asoc, type, arg, commands); + return sctp_sf_ootb(net, ep, asoc, type, arg, commands); else - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); } /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') @@ -1677,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, * Section 5.2.4 * A) In this case, the peer may have restarted. */ -static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1713,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(ep, asoc, + disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, SCTP_ST_CHUNK(chunk->chunk_hdr->type), chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) @@ -1776,7 +1791,8 @@ nomem: * after responding to the local endpoint's INIT */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1797,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(sock_net(new_asoc->base.sk), SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1846,7 +1862,8 @@ nomem: * but a new tag of its own. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1867,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, * enter the ESTABLISHED state, if it has not already done so. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1889,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -1961,7 +1979,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1980,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, * done later. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we @@ -2014,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -2030,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, switch (action) { case 'A': /* Association restart. */ - retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands, new_asoc); break; case 'B': /* Collision case B. */ - retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands, new_asoc); break; case 'C': /* Collision case C. */ - retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands, new_asoc); break; case 'D': /* Collision case D. */ - retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands, new_asoc); break; default: /* Discard packet for all others. */ - retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands); + retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; } @@ -2076,6 +2095,7 @@ nomem: * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_pending_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2085,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2098,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2107,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2117,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2126,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2139,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2148,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -2158,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2167,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2176,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands); } /* @@ -2193,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2203,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Process the error here */ @@ -2219,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, */ sctp_walk_errors(err, chunk->chunk_hdr) { if (SCTP_ERROR_STALE_COOKIE == err->cause) - return sctp_sf_do_5_2_6_stale(ep, asoc, type, + return sctp_sf_do_5_2_6_stale(net, ep, asoc, type, arg, commands); } @@ -2228,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * we are discarding the packet, there should be no adverse * affects. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* @@ -2256,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2378,7 +2402,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2387,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2400,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2409,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2423,7 +2449,6 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; unsigned int len; __be16 error = SCTP_ERROR_NO_ERROR; - struct net *net; /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); @@ -2432,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_errhdr_t *err; sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); error = ((sctp_errhdr_t *)chunk->skb->data)->cause; } @@ -2440,7 +2465,6 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); - net = sock_net(ep->base.sk); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); @@ -2452,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * * See sctp_sf_do_9_1_abort() above. */ -sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2463,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2476,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((sctp_errhdr_t *)chunk->skb->data)->cause; - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } /* * Process an incoming ICMP as an ABORT. (COOKIE-WAIT state) */ -sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, + return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR, ENOPROTOOPT, asoc, (struct sctp_transport *)arg); } @@ -2504,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep /* * Process an ABORT. (COOKIE-ECHOED state) */ -sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2513,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands); } /* @@ -2521,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, * * This is common code called by several sctp_sf_*_abort() functions above. */ -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport) @@ -2529,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); @@ -2572,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2585,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Convert the elaborate header. */ @@ -2610,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to @@ -2634,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type, + disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type, arg, commands); } @@ -2660,7 +2689,8 @@ out: * The Cumulative TSN Ack of the received SHUTDOWN chunk * MUST be processed. */ -sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2671,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sdh = (sctp_shutdownhdr_t *)chunk->skb->data; @@ -2693,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* verify, by checking the Cumulative TSN Ack field of the * chunk, that all its outstanding DATA chunks have been @@ -2712,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2723,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Since we are not going to really process this INIT, there @@ -2775,7 +2806,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2786,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); cwr = (sctp_cwrhdr_t *) chunk->skb->data; @@ -2830,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecne(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2840,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ecne = (sctp_ecnehdr_t *) chunk->skb->data; @@ -2886,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2899,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -2912,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -3007,7 +3041,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3019,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -3037,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -3097,7 +3132,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3108,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Pull the SACK chunk from the data buffer */ sackh = sctp_sm_pull_sack(chunk); /* Was this a bogus SACK? */ if (!sackh) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); chunk->subh.sack_hdr = sackh; ctsn = ntohl(sackh->cum_tsn_ack); @@ -3140,7 +3176,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* Return this SACK for further processing. */ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh)); @@ -3169,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3178,9 +3215,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort; - struct net *net; - net = sock_net(ep->base.sk); packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { @@ -3207,7 +3242,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; } @@ -3222,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_operr_notify(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3232,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err, commands); sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, @@ -3259,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3268,14 +3305,13 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; struct sctp_ulpevent *ev; - struct net *net; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 10.2 H) SHUTDOWN COMPLETE notification * @@ -3308,7 +3344,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); @@ -3343,7 +3378,8 @@ nomem: * receiver of the OOTB packet shall discard the OOTB packet and take * no further action. */ -sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ootb(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3356,16 +3392,14 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, __u8 *ch_end; int ootb_shut_ack = 0; int ootb_cookie_ack = 0; - struct net *net; - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Now that we know we at least have a chunk header, @@ -3380,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * sending an ABORT of its own. */ if (SCTP_CID_ABORT == ch->type) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR * or a COOKIE ACK the SCTP Packet should be silently @@ -3402,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, /* Report violation if chunk len overflows */ ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) - return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); else if (ootb_cookie_ack) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); else - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* @@ -3437,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3446,9 +3481,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *shut; - struct net *net; - net = sock_net(ep->base.sk); packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { @@ -3479,13 +3512,13 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, * the reset of the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* We need to discard the rest of the packet to prevent * potential bomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } return SCTP_DISPOSITION_NOMEM; @@ -3502,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, * chunks. --piggy ] * */ -sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3512,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Although we do have an association in this case, it corresponds @@ -3520,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ - SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands); } /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */ -sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3542,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP: Section 4.1.1 @@ -3552,11 +3587,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!sctp_addip_noauth && !chunk->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hdr = (sctp_addiphdr_t *)chunk->skb->data; @@ -3565,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3573,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value @@ -3653,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * When building TLV parameters for the ASCONF Chunk that will add or * delete IP addresses the D0 to D13 rules should be applied: */ -sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3668,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(asconf_ack, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP, Section 4.1.2: @@ -3678,11 +3714,11 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!sctp_addip_noauth && !asconf_ack->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; @@ -3693,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); if (last_asconf) { @@ -3711,7 +3747,6 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, */ if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) && !(asoc->addip_last_asconf)) { - struct net *net; abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); if (abort) { @@ -3729,14 +3764,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) { - struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); @@ -3765,7 +3798,6 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; @@ -3788,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3803,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3855,6 +3888,7 @@ discard_noforce: } sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -3870,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3942,7 +3976,8 @@ gen_shutdown: * * The return value is the disposition of the chunk. */ -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk) @@ -4015,7 +4050,8 @@ nomem: return SCTP_IERROR_NOMEM; } -sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_auth(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4028,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Make sure that the peer has AUTH capable */ if (!asoc->peer.auth_capable) - return sctp_sf_unk_chunk(ep, asoc, type, arg, commands); + return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the AUTH chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); auth_hdr = (struct sctp_authhdr *)chunk->skb->data; - error = sctp_sf_authenticate(ep, asoc, type, chunk); + error = sctp_sf_authenticate(net, ep, asoc, type, chunk); switch (error) { case SCTP_IERROR_AUTH_BAD_HMAC: /* Generate the ERROR chunk and discard the rest @@ -4059,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Fall Through */ case SCTP_IERROR_AUTH_BAD_KEYID: case SCTP_IERROR_BAD_SIG: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); case SCTP_IERROR_NOMEM: @@ -4111,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_unk_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4124,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); switch (type.chunk & SCTP_CID_ACTION_MASK) { case SCTP_CID_ACTION_DISCARD: /* Discard the packet. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; case SCTP_CID_ACTION_DISCARD_ERR: /* Generate an ERROR chunk as response. */ @@ -4152,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, } /* Discard the packet. */ - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; break; case SCTP_CID_ACTION_SKIP: @@ -4194,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_discard_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4207,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); @@ -4232,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_pdiscard(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4259,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, * We simply tag the chunk as a violation. The state machine will log * the violation and continue. */ -sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_violation(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4269,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_VIOLATION; @@ -4279,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, * Common function to handle a protocol violation. */ static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -4289,7 +4330,6 @@ static sctp_disposition_t sctp_sf_abort_violation( struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort = NULL; - struct net *net; /* SCTP-AUTH, Section 6.3: * It should be noted that if the receiver wants to tear @@ -4310,7 +4350,6 @@ static sctp_disposition_t sctp_sf_abort_violation( if (!abort) goto nomem; - net = sock_net(ep->base.sk); if (asoc) { /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && @@ -4369,7 +4408,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem_pkt: @@ -4398,6 +4437,7 @@ nomem: * Generate an ABORT chunk and terminate the association. */ static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4406,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( { static const char err_str[]="The following chunk had invalid length:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4417,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( * the length is considered as invalid. */ static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4426,7 +4467,6 @@ static sctp_disposition_t sctp_sf_violation_paramlen( struct sctp_chunk *chunk = arg; struct sctp_paramhdr *param = ext; struct sctp_chunk *abort = NULL; - struct net *net; if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) goto discard; @@ -4436,7 +4476,6 @@ static sctp_disposition_t sctp_sf_violation_paramlen( if (!abort) goto nomem; - net = sock_net(asoc->base.sk); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); @@ -4448,7 +4487,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem: return SCTP_DISPOSITION_NOMEM; @@ -4461,6 +4500,7 @@ nomem: * error code. */ static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4469,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( { static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4480,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( * on the path and we may not want to continue this communication. */ static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4489,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk( static const char err_str[]="The following chunk violates protocol:"; if (!asoc) - return sctp_sf_violation(ep, asoc, type, arg, commands); + return sctp_sf_violation(net, ep, asoc, type, arg, commands); - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } /*************************************************************************** @@ -4554,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk( * * The return value is a disposition. */ -sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4665,7 +4707,8 @@ nomem: * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_send(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4704,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4725,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -4759,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_1_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4775,7 +4820,6 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( */ struct sctp_chunk *abort = arg; sctp_disposition_t retval; - struct net *net; retval = SCTP_DISPOSITION_CONSUME; @@ -4791,7 +4835,6 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); @@ -4799,7 +4842,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( } /* We tried an illegal operation on an association which is closed. */ -sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_closed(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4812,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, /* We tried an illegal operation on an association which is shutting * down. */ -sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4838,14 +4883,13 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - struct net *net = sock_net(asoc->base.sk); - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); @@ -4874,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4882,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands); } /* @@ -4900,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4908,7 +4954,6 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( { struct sctp_chunk *abort = arg; sctp_disposition_t retval; - struct net *net = sock_net(asoc->base.sk); /* Stop T1-init timer */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4950,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4959,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4975,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4985,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -5001,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5015,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -5031,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5040,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -5066,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( * association on which a heartbeat should be issued. */ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5097,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( * When an endpoint has an ASCONF signaled change to be sent to the * remote endpoint it should do A1 to A9 */ -sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5118,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, * The return value is the disposition of the primitive. */ sctp_disposition_t sctp_sf_ignore_primitive( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5139,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive( * retransmit, the stack will immediately send up this notification. */ sctp_disposition_t sctp_sf_do_no_pending_tsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5170,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5239,6 +5293,7 @@ nomem: * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5257,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( */ if (chunk) { if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); } @@ -5309,7 +5364,8 @@ nomem: * * The return value is the disposition of the event. */ -sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ignore_other(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5334,14 +5390,14 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_transport *transport = arg; - struct net *net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); @@ -5421,13 +5477,13 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, * allow. However, an SCTP transmitter MUST NOT be more aggressive than * the following algorithms allow. */ -sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - struct net *net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; @@ -5452,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5461,7 +5518,6 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *repl = NULL; struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; - struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); @@ -5514,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5522,7 +5579,6 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep { struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; - struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); @@ -5563,14 +5619,14 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep * the T2-Shutdown timer, giving its peer ample opportunity to transmit * all of its queued DATA chunks that have not yet been sent. */ -sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; - struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); @@ -5633,6 +5689,7 @@ nomem: * If the T4 RTO timer expires the endpoint should do B1 to B5 */ sctp_disposition_t sctp_sf_t4_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5641,7 +5698,6 @@ sctp_disposition_t sctp_sf_t4_timer_expire( { struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; - struct net *net = sock_net(asoc->base.sk); SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS); @@ -5704,14 +5760,14 @@ sctp_disposition_t sctp_sf_t4_timer_expire( * At the expiration of this timer the sender SHOULD abort the association * by sending an ABORT chunk. */ -sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; - struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); @@ -5740,13 +5796,13 @@ nomem: * the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown(). */ sctp_disposition_t sctp_sf_autoclose_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - struct net *net = sock_net(asoc->base.sk); int disposition; SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); @@ -5764,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -5782,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_not_impl(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5799,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_bug(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5819,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_timer_ignore(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5861,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk) /* Create an ABORT packet to be sent as a response, with the specified * error causes. */ -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -5869,9 +5929,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, { struct sctp_packet *packet; struct sctp_chunk *abort; - struct net *net; - net = sock_net(ep->base.sk); packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { @@ -5984,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet) } /* Send a stale cookie error when a invalid COOKIE ECHO chunk is found */ -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -5993,7 +6052,6 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (err_chunk) { - struct net *net = sock_net(ep->base.sk); packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { struct sctp_signed_cookie *cookie; @@ -6027,7 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; - struct net *net; + struct net *net = sock_net(sk); u16 ssn; u16 sid; u8 ordered = 0; @@ -6144,7 +6202,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, * No User Data: This error cause is returned to the originator of a * DATA chunk if a received DATA chunk has no user data. */ - net = sock_net(sk); if (unlikely(0 == datalen)) { err = sctp_make_abort_no_data(asoc, chunk, tsn); if (err) { -- cgit v1.2.1 From f53b5b097e58361668b785eff9f7bcd12b4255ec Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:29:08 +0000 Subject: sctp: Push struct net down into sctp_verify_ext_param Add struct net as a parameter to sctp_verify_param so it can be passed to sctp_verify_ext_param where struct net will be needed when the sctp tunables become per net tunables. Add struct net as a parameter to sctp_verify_init so struct net can be passed to sctp_verify_param. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 11 ++++++----- net/sctp/sm_statefuns.c | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fb12835e95c2..a4b096f85a68 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1940,7 +1940,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } -static int sctp_verify_ext_param(union sctp_params param) +static int sctp_verify_ext_param(struct net *net, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int have_auth = 0; @@ -2081,7 +2081,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, +static sctp_ierror_t sctp_verify_param(struct net *net, + const struct sctp_association *asoc, union sctp_params param, sctp_cid_t cid, struct sctp_chunk *chunk, @@ -2110,7 +2111,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_SUPPORTED_EXT: - if (!sctp_verify_ext_param(param)) + if (!sctp_verify_ext_param(net, param)) return SCTP_IERROR_ABORT; break; @@ -2198,7 +2199,7 @@ fallthrough: } /* Verify the INIT packet before we process it. */ -int sctp_verify_init(const struct sctp_association *asoc, +int sctp_verify_init(struct net *net, const struct sctp_association *asoc, sctp_cid_t cid, sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, @@ -2245,7 +2246,7 @@ int sctp_verify_init(const struct sctp_association *asoc, /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - result = sctp_verify_param(asoc, param, cid, chunk, errp); + result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 19f3bff84193..e17ada47afc4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -364,7 +364,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. @@ -531,7 +531,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { @@ -1429,7 +1429,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. -- cgit v1.2.1 From e1fc3b14f9a90d9591016749289f2c3d7b35fbf4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:29:57 +0000 Subject: sctp: Make sysctl tunables per net Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/associola.c | 10 ++- net/sctp/auth.c | 20 ++++- net/sctp/bind_addr.c | 2 +- net/sctp/endpointola.c | 9 ++- net/sctp/input.c | 2 +- net/sctp/protocol.c | 128 ++++++++++++++++---------------- net/sctp/sm_make_chunk.c | 47 ++++++------ net/sctp/sm_statefuns.c | 4 +- net/sctp/sm_statetable.c | 6 +- net/sctp/socket.c | 65 ++++++++++------- net/sctp/sysctl.c | 185 ++++++++++++++++++++++++----------------------- net/sctp/transport.c | 15 ++-- 12 files changed, 265 insertions(+), 228 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 93a4513c85e0..b1ef3bc301a5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_scope_t scope, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_sock *sp; int i; sctp_paramhdr_t *p; @@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = sctp_pf_retrans; + asoc->pf_retrans = net->sctp.pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; + min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * and will revert old behavior. */ asoc->peer.asconf_capable = 0; - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; @@ -1418,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) /* Should we send a SACK to update our peer? */ static inline int sctp_peer_needs_update(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: @@ -1425,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, - (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) return 1; break; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index bf812048cf6f..aaa6c121ecce 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -392,13 +392,14 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!sctp_auth_enable || !asoc->peer.auth_capable) + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { + struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; /* if the transforms are already allocted, we are done */ - if (!sctp_auth_enable) { + if (!net->sctp.auth_enable) { ep->auth_hmacs = NULL; return 0; } @@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable); return 0; return __sctp_auth_cid(chunk, diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 23389ba44e39..d886b3bf84f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -512,7 +512,7 @@ int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t sco * Address scoping can be selectively controlled via sysctl * option */ - switch (sctp_scope_policy) { + switch (net->sctp.scope_policy) { case SCTP_SCOPE_POLICY_DISABLE: return 1; case SCTP_SCOPE_POLICY_ENABLE: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8315792ef2ba..1859e2bc83d1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_hmac_algo_param *auth_hmacs = NULL; struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; @@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. @@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = @@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ - ep->sndbuf_policy = sctp_sndbuf_policy; + ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ - ep->rcvbuf_policy = sctp_rcvbuf_policy; + ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ceb70ee06c..25dfe7380479 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1094,7 +1094,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, break; case SCTP_CID_ASCONF: - if (have_auth || sctp_addip_noauth) + if (have_auth || net->sctp.addip_noauth) asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 59965bdea07a..2d518425d598 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1169,6 +1169,70 @@ static int sctp_net_init(struct net *net) { int status; + /* + * 14. Suggested SCTP Protocol Parameter Values + */ + /* The following protocol parameters are RECOMMENDED: */ + /* RTO.Initial - 3 seconds */ + net->sctp.rto_initial = SCTP_RTO_INITIAL; + /* RTO.Min - 1 second */ + net->sctp.rto_min = SCTP_RTO_MIN; + /* RTO.Max - 60 seconds */ + net->sctp.rto_max = SCTP_RTO_MAX; + /* RTO.Alpha - 1/8 */ + net->sctp.rto_alpha = SCTP_RTO_ALPHA; + /* RTO.Beta - 1/4 */ + net->sctp.rto_beta = SCTP_RTO_BETA; + + /* Valid.Cookie.Life - 60 seconds */ + net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; + + /* Whether Cookie Preservative is enabled(1) or not(0) */ + net->sctp.cookie_preserve_enable = 1; + + /* Max.Burst - 4 */ + net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + + /* Association.Max.Retrans - 10 attempts + * Path.Max.Retrans - 5 attempts (per destination address) + * Max.Init.Retransmits - 8 attempts + */ + net->sctp.max_retrans_association = 10; + net->sctp.max_retrans_path = 5; + net->sctp.max_retrans_init = 8; + + /* Sendbuffer growth - do per-socket accounting */ + net->sctp.sndbuf_policy = 0; + + /* Rcvbuffer growth - do per-socket accounting */ + net->sctp.rcvbuf_policy = 0; + + /* HB.interval - 30 seconds */ + net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; + + /* delayed SACK timeout */ + net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + + /* Disable ADDIP by default. */ + net->sctp.addip_enable = 0; + net->sctp.addip_noauth = 0; + net->sctp.default_auto_asconf = 0; + + /* Enable PR-SCTP by default. */ + net->sctp.prsctp_enable = 1; + + /* Disable AUTH by default. */ + net->sctp.auth_enable = 0; + + /* Set SCOPE policy to enabled */ + net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; + + /* Set the default rwnd update threshold */ + net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + + /* Initialize maximum autoclose timeout. */ + net->sctp.max_autoclose = INT_MAX / HZ; + status = sctp_sysctl_net_register(net); if (status) goto err_sysctl_register; @@ -1272,59 +1336,12 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_percpu_counter_init; - /* - * 14. Suggested SCTP Protocol Parameter Values - */ - /* The following protocol parameters are RECOMMENDED: */ - /* RTO.Initial - 3 seconds */ - sctp_rto_initial = SCTP_RTO_INITIAL; - /* RTO.Min - 1 second */ - sctp_rto_min = SCTP_RTO_MIN; - /* RTO.Max - 60 seconds */ - sctp_rto_max = SCTP_RTO_MAX; - /* RTO.Alpha - 1/8 */ - sctp_rto_alpha = SCTP_RTO_ALPHA; - /* RTO.Beta - 1/4 */ - sctp_rto_beta = SCTP_RTO_BETA; - - /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; - - /* Whether Cookie Preservative is enabled(1) or not(0) */ - sctp_cookie_preserve_enable = 1; - - /* Max.Burst - 4 */ - sctp_max_burst = SCTP_DEFAULT_MAX_BURST; - - /* Association.Max.Retrans - 10 attempts - * Path.Max.Retrans - 5 attempts (per destination address) - * Max.Init.Retransmits - 8 attempts - */ - sctp_max_retrans_association = 10; - sctp_max_retrans_path = 5; - sctp_max_retrans_init = 8; - - /* Sendbuffer growth - do per-socket accounting */ - sctp_sndbuf_policy = 0; - - /* Rcvbuffer growth - do per-socket accounting */ - sctp_rcvbuf_policy = 0; - - /* HB.interval - 30 seconds */ - sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; - - /* delayed SACK timeout */ - sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; - /* Implementation specific variables. */ /* Initialize default stream count setup information. */ sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; - /* Initialize maximum autoclose timeout. */ - sctp_max_autoclose = INT_MAX / HZ; - /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); @@ -1411,23 +1428,6 @@ SCTP_STATIC __init int sctp_init(void) pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); - /* Disable ADDIP by default. */ - sctp_addip_enable = 0; - sctp_addip_noauth = 0; - sctp_default_auto_asconf = 0; - - /* Enable PR-SCTP by default. */ - sctp_prsctp_enable = 1; - - /* Disable AUTH by default. */ - sctp_auth_enable = 0; - - /* Set SCOPE policy to enabled */ - sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; - - /* Set the default rwnd update threshold */ - sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; - sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a4b096f85a68..fbe1636309a7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { + struct net *net = sock_net(asoc->base.sk); sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: @@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_param(retval, num_ext, extensions); } - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { @@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) * only if ADD-IP is turned on and we are not backward-compatible * mode. */ - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) return 1; - if (sctp_addip_enable && !have_auth && have_asconf) + if (net->sctp.addip_enable && !have_auth && have_asconf) return 0; return 1; @@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { + struct net *net = sock_net(asoc->base.sk); __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_FWD_TSN: - if (sctp_prsctp_enable && + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; break; @@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (sctp_auth_enable) + if (net->sctp.auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (sctp_addip_enable) + if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; default: @@ -2116,7 +2118,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_SET_PRIMARY: - if (sctp_addip_enable) + if (net->sctp.addip_enable) break; goto fallthrough; @@ -2127,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) break; goto fallthrough; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2149,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2165,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2271,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, sctp_init_chunk_t *peer_init, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_params param; struct sctp_transport *transport; struct list_head *pos, *temp; @@ -2327,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * also give us an option to silently ignore the packet, which * is what we'll do here. */ - if (!sctp_addip_noauth && + if (!net->sctp.addip_noauth && (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | @@ -2502,7 +2505,7 @@ do_addr_param: break; case SCTP_PARAM_COOKIE_PRESERVATIVE: - if (!sctp_cookie_preserve_enable) + if (!net->sctp.cookie_preserve_enable) break; stale = ntohl(param.life->lifespan_increment); @@ -2582,7 +2585,7 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) goto fall_through; addr_param = param.v + sizeof(sctp_addip_param_t); @@ -2609,7 +2612,7 @@ do_addr_param: break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } @@ -2617,7 +2620,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2630,7 +2633,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2646,7 +2649,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e17ada47afc4..094813b6c3c3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3586,7 +3586,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !chunk->auth) + if (!net->sctp.addip_noauth && !chunk->auth) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ @@ -3713,7 +3713,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !asconf_ack->auth) + if (!net->sctp.addip_noauth && !asconf_ack->auth) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ADDIP chunk has a valid length. */ diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 4a029d798287..84d98d8a5a74 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -918,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { if (cid == SCTP_CID_FWD_TSN) return &prsctp_chunk_event_table[0][state]; } - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { if (cid == SCTP_CID_ASCONF) return &addip_chunk_event_table[0][state]; @@ -931,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, return &addip_chunk_event_table[1][state]; } - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { if (cid == SCTP_CID_AUTH) return &auth_chunk_event_table[0][state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a6a4226a922f..d37d24ff197f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -516,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -530,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -718,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -733,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -3039,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3048,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3285,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authchunk val; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3317,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3354,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3395,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3423,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3849,6 +3857,7 @@ out: */ SCTP_STATIC int sctp_init_sock(struct sock *sk) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_sock *sp; @@ -3878,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_timetolive = 0; sp->default_rcv_context = 0; - sp->max_burst = sctp_max_burst; + sp->max_burst = net->sctp.max_burst; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or @@ -3886,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) */ sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; - sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = sctp_rto_max; + sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init; + sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = sctp_rto_initial; - sp->rtoinfo.srto_max = sctp_rto_max; - sp->rtoinfo.srto_min = sctp_rto_min; + sp->rtoinfo.srto_initial = net->sctp.rto_initial; + sp->rtoinfo.srto_max = net->sctp.rto_max; + sp->rtoinfo.srto_min = net->sctp.rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. */ - sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association; + sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association; sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; + sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3913,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = sctp_hb_interval; - sp->pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = net->sctp.hb_interval; + sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = sctp_sack_timeout; + sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | @@ -3967,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (sctp_default_auto_asconf) { + sock_prot_inuse_add(net, sk->sk_prot, 1); + if (net->sctp.default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sock_net(sk)->sctp.auto_asconf_splist); + &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -5307,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; hmacs = sctp_sk(sk)->ep->auth_hmacs_list; @@ -5336,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5368,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5375,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5411,6 +5423,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5418,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bee36c408dde..70e3ba5cb50b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -63,9 +63,35 @@ extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { + { + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, + { + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + + { /* sentinel */ } +}; + +static ctl_table sctp_net_table[] = { { .procname = "rto_initial", - .data = &sctp_rto_initial, + .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -74,7 +100,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_min", - .data = &sctp_rto_min, + .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -83,7 +109,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_max", - .data = &sctp_rto_max, + .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -91,17 +117,22 @@ static ctl_table sctp_table[] = { .extra2 = &timer_max }, { - .procname = "valid_cookie_life", - .data = &sctp_valid_cookie_life, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .procname = "rto_alpha_exp_divisor", + .data = &init_net.sctp.rto_alpha, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "rto_beta_exp_divisor", + .data = &init_net.sctp.rto_beta, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, { .procname = "max_burst", - .data = &sctp_max_burst, + .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -109,31 +140,42 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "association_max_retrans", - .data = &sctp_max_retrans_association, + .procname = "cookie_preserve_enable", + .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "valid_cookie_life", + .data = &init_net.sctp.valid_cookie_life, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "sndbuf_policy", - .data = &sctp_sndbuf_policy, + .procname = "sack_timeout", + .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sack_timer_min, + .extra2 = &sack_timer_max, }, { - .procname = "rcvbuf_policy", - .data = &sctp_rcvbuf_policy, - .maxlen = sizeof(int), + .procname = "hb_interval", + .data = &init_net.sctp.hb_interval, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "path_max_retrans", - .data = &sctp_max_retrans_path, + .procname = "association_max_retrans", + .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -141,17 +183,17 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "pf_retrans", - .data = &sctp_pf_retrans, + .procname = "path_max_retrans", + .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &int_max }, { .procname = "max_init_retransmits", - .data = &sctp_max_retrans_init, + .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -159,103 +201,66 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "hb_interval", - .data = &sctp_hb_interval, - .maxlen = sizeof(unsigned int), + .procname = "pf_retrans", + .data = &init_net.sctp.pf_retrans, + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .extra1 = &zero, + .extra2 = &int_max }, { - .procname = "cookie_preserve_enable", - .data = &sctp_cookie_preserve_enable, + .procname = "sndbuf_policy", + .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "rto_alpha_exp_divisor", - .data = &sctp_rto_alpha, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "rto_beta_exp_divisor", - .data = &sctp_rto_beta, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "addip_enable", - .data = &sctp_addip_enable, + .procname = "rcvbuf_policy", + .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", - .data = &sctp_default_auto_asconf, + .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "prsctp_enable", - .data = &sctp_prsctp_enable, + .procname = "addip_enable", + .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sack_timeout", - .data = &sctp_sack_timeout, + .procname = "addip_noauth_enable", + .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &sack_timer_min, - .extra2 = &sack_timer_max, - }, - { - .procname = "sctp_mem", - .data = &sysctl_sctp_mem, - .maxlen = sizeof(sysctl_sctp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "sctp_rmem", - .data = &sysctl_sctp_rmem, - .maxlen = sizeof(sysctl_sctp_rmem), - .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sctp_wmem", - .data = &sysctl_sctp_wmem, - .maxlen = sizeof(sysctl_sctp_wmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "auth_enable", - .data = &sctp_auth_enable, + .procname = "prsctp_enable", + .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "addip_noauth_enable", - .data = &sctp_addip_noauth, + .procname = "auth_enable", + .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addr_scope_policy", - .data = &sctp_scope_policy, + .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -264,7 +269,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rwnd_update_shift", - .data = &sctp_rwnd_upd_shift, + .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -273,7 +278,7 @@ static ctl_table sctp_table[] = { }, { .procname = "max_autoclose", - .data = &sctp_max_autoclose, + .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -284,18 +289,18 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { - { /* sentinel */ } -}; - int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; + int i; table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); if (!table) return -ENOMEM; + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); return 0; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index aada963c9d6b..953c21e4af97 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -77,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; @@ -87,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net, SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ - peer->pathmaxrxt = sctp_max_retrans_path; - peer->pf_retrans = sctp_pf_retrans; + peer->pathmaxrxt = net->sctp.max_retrans_path; + peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -318,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); if (tp->rttvar || tp->srtt) { + struct net *net = sock_net(tp->asoc->base.sk); /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -329,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) - + ((abs(tp->srtt - rtt)) >> sctp_rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) - + (rtt >> sctp_rto_alpha); + tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta); + tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. -- cgit v1.2.1 From c03307eab68d583ea6db917681afa14ed1fb3b84 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Aug 2012 08:19:33 -0700 Subject: bridge: fix rcu dereference outside of rcu_read_lock Alternative solution for problem found by Linux Driver Verification project (linuxtesting.org). As it noted in the comment before the br_handle_frame_finish function, this function should be called under rcu_read_lock. The problem callgraph: br_dev_xmit -> br_nf_pre_routing_finish_bridge_slow -> -> br_handle_frame_finish -> br_port_get_rcu -> rcu_dereference And in this case there is no read-lock section. Reported-by: Denis Efremov Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32211fa5b506..070e8a68cfc6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { br_nf_pre_routing_finish_bridge_slow(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } #endif @@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - rcu_read_lock(); if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb); else if (is_multicast_ether_addr(dest)) { -- cgit v1.2.1 From 65e0736bc2ac314bd374e93c24dd0698ac5ee66d Mon Sep 17 00:00:00 2001 From: Fan Du Date: Wed, 15 Aug 2012 10:13:47 +0800 Subject: xfrm: remove redundant parameter "int dir" in struct xfrm_mgr.acquire Sematically speaking, xfrm_mgr.acquire is called when kernel intends to ask user space IKE daemon to negotiate SAs with peers. IOW the direction will *always* be XFRM_POLICY_OUT, so remove int dir for clarity. Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 9 ++++----- 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 34e418508a67..ec7d161c129b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3024,7 +3024,7 @@ static u32 get_acqseq(void) return res; } -static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir) +static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -3105,7 +3105,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; - pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_id = xp->index; /* Set sadb_comb's. */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 87cd0e4d4282..7856c33898fa 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1700,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); + acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..ab58034c42d6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2567,8 +2567,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, } static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) + struct xfrm_tmpl *xt, struct xfrm_policy *xp) { __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; @@ -2583,7 +2582,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); - copy_to_user_policy(xp, &ua->policy, dir); + copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; @@ -2605,7 +2604,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) + struct xfrm_policy *xp) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2614,7 +2613,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp, dir) < 0) + if (build_acquire(skb, x, xt, xp) < 0) BUG(); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); -- cgit v1.2.1 From 1b3a6926fc3430116c97c333f7cf901b86779008 Mon Sep 17 00:00:00 2001 From: Razvan Ghitulete Date: Tue, 14 Aug 2012 16:30:20 +0300 Subject: net: remove wrong initialization for snd_wl1 The field tp->snd_wl1 is twice initialized, the second time seems to be wrong as it may overwrite any update in tcp_ack. Signed-off-by: Razvan Ghitulete Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fa2c2c2cac2d..c423317eb719 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5744,7 +5744,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_ECN_rcv_synack(tp, th); - tp->snd_wl1 = TCP_SKB_CB(skb)->seq; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5757,7 +5757,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * never scaled. */ tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; -- cgit v1.2.1 From e862f1a9b7df4e8196ebec45ac62295138aa3fc2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:44 +0000 Subject: atm: fix info leak in getsockopt(SO_ATMPVC) The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/atm/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index b4b44dbed645..0c0ad930a632 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; -- cgit v1.2.1 From 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:45 +0000 Subject: atm: fix info leak via getsockname() The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/atm/pvc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 3a734919c36c..ae0324021407 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; -- cgit v1.2.1 From e15ca9a0ef9a86f0477530b0f44a725d67f889ee Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:46 +0000 Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER) The HCI code fails to initialize the two padding bytes of struct hci_ufilter before copying it to userland -- that for leaking two bytes kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de03d79..a27bbc3cd4b7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1009,6 +1009,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); -- cgit v1.2.1 From 3f68ba07b1da811bf383b4b701b129bfcb2e4988 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:47 +0000 Subject: Bluetooth: HCI - Fix info leak via getsockname() The HCI code fails to initialize the hci_channel member of struct sockaddr_hci and that for leaks two bytes kernel stack via the getsockname() syscall. Initialize hci_channel with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a27bbc3cd4b7..19fdac78e555 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; + haddr->hci_channel= 0; release_sock(sk); return 0; -- cgit v1.2.1 From 9ad2de43f1aee7e7274a4e0d41465489299e344b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:48 +0000 Subject: Bluetooth: RFCOMM - Fix info leak in getsockopt(BT_SECURITY) The RFCOMM code fails to initialize the key_size member of struct bt_security before copying it to userland -- that for leaking one byte kernel stack. Initialize key_size with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e59645c05..64f55ca61472 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -822,6 +822,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c } sec.level = rfcomm_pi(sk)->sec_level; + sec.key_size = 0; len = min_t(unsigned int, len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) -- cgit v1.2.1 From f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:49 +0000 Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST) The RFCOMM code fails to initialize the two padding bytes of struct rfcomm_dev_list_req inserted for alignment before copying it to userland. Additionally there are two padding bytes in each instance of struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus dev_num times two bytes uninitialized kernel heap memory. Allocate the memory using kzalloc() to fix this issue. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index cb960773c002..56f182393c4c 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - dl = kmalloc(size, GFP_KERNEL); + dl = kzalloc(size, GFP_KERNEL); if (!dl) return -ENOMEM; -- cgit v1.2.1 From 9344a972961d1a6d2c04d9008b13617bcb6ec2ef Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:50 +0000 Subject: Bluetooth: RFCOMM - Fix info leak via getsockname() The RFCOMM code fails to initialize the trailing padding byte of struct sockaddr_rc added for alignment. It that for leaks one byte kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 64f55ca61472..1a17850d093c 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) -- cgit v1.2.1 From 792039c73cf176c8e39a6e8beef2c94ff46522ed Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:51 +0000 Subject: Bluetooth: L2CAP - Fix info leak via getsockname() The L2CAP code fails to initialize the l2_bdaddr_type member of struct sockaddr_l2 and the padding byte added for alignment. It that for leaks two bytes kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b94abd30e6f9..1497edd191a2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -245,6 +245,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); -- cgit v1.2.1 From 04d4fbca1017c11381e7d82acea21dd741e748bc Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:52 +0000 Subject: l2tp: fix info leak via getsockname() The L2TP code for IPv6 fails to initialize the l2tp_unused member of struct sockaddr_l2tpip6 and that for leaks two bytes kernel stack via the getsockname() syscall. Initialize l2tp_unused with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 35e1e4bde587..927547171bc7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_family = AF_INET6; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_unused = 0; if (peer) { if (!lsk->peer_conn_id) return -ENOTCONN; -- cgit v1.2.1 From 3592aaeb80290bda0f2cf0b5456c97bfc638b192 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:53 +0000 Subject: llc: fix info leak via getsockname() The LLC code wrongly returns 0, i.e. "success", when the socket is zapped. Together with the uninitialized uaddrlen pointer argument from sys_getsockname this leads to an arbitrary memory leak of up to 128 bytes kernel stack via the getsockname() syscall. Return an error instead when the socket is zapped to prevent the info leak. Also remove the unnecessary memset(0). We don't directly write to the memory pointed by uaddr but memcpy() a local structure at the end of the function that is properly initialized. Signed-off-by: Mathias Krause Cc: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8c2919ca36f6..c2190005a114 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) -- cgit v1.2.1 From 276bdb82dedb290511467a5a4fdbe9f0b52dce6f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:54 +0000 Subject: dccp: check ccid before dereferencing ccid_hc_rx_getsockopt() and ccid_hc_tx_getsockopt() might be called with a NULL ccid pointer leading to a NULL pointer dereference. This could lead to a privilege escalation if the attacker is able to map page 0 and prepare it with a fake ccid_ops pointer. Signed-off-by: Mathias Krause Cc: Gerrit Renker Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582a7678..fb85d371a8de 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; -- cgit v1.2.1 From 7b07f8eb75aa3097cdfd4f6eac3da49db787381d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:55 +0000 Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO) The CCID3 code fails to initialize the trailing padding bytes of struct tfrc_tx_info added for alignment on 64 bit architectures. It that for potentially leaks four bytes kernel stack via the getsockopt() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d65e98798eca..119c04317d48 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; -- cgit v1.2.1 From 2d8a041b7bfe1097af21441cb77d6af95f4f4680 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:56 +0000 Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT) If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is not set, __ip_vs_get_timeouts() does not fully initialize the structure that gets copied to userland and that for leaks up to 12 bytes of kernel stack. Add an explicit memset(0) before passing the structure to __ip_vs_get_timeouts() to avoid the info leak. Signed-off-by: Mathias Krause Cc: Wensong Zhang Cc: Simon Horman Cc: Julian Anastasov Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_ctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dda194b..72bf32a84874 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2759,6 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; -- cgit v1.2.1 From 43da5f2e0d0c69ded3d51907d9552310a6b545e8 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:57 +0000 Subject: net: fix info leak in compat dev_ifconf() The implementation of dev_ifconf() for the compat ioctl interface uses an intermediate ifc structure allocated in userland for the duration of the syscall. Though, it fails to initialize the padding bytes inserted for alignment and that for leaks four bytes of kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dfe5b66c97e0..a5471f804d99 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; -- cgit v1.2.1 From 2614f86490122bf51eb7c12ec73927f1900f4e7d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Aug 2012 02:25:24 +0200 Subject: netfilter: nf_ct_expect: fix possible access to uninitialized timer In __nf_ct_expect_check, the function refresh_timer returns 1 if a matching expectation is found and its timer is successfully refreshed. This results in nf_ct_expect_related returning 0. Note that at this point: - the passed expectation is not inserted in the expectation table and its timer was not initialized, since we have refreshed one matching/existing expectation. - nf_ct_expect_alloc uses kmem_cache_alloc, so the expectation timer is in some undefined state just after the allocation, until it is appropriately initialized. This can be a problem for the SIP helper during the expectation addition: ... if (nf_ct_expect_related(rtp_exp) == 0) { if (nf_ct_expect_related(rtcp_exp) != 0) nf_ct_unexpect_related(rtp_exp); ... Note that nf_ct_expect_related(rtp_exp) may return 0 for the timer refresh case that is detailed above. Then, if nf_ct_unexpect_related(rtcp_exp) returns != 0, nf_ct_unexpect_related(rtp_exp) is called, which does: spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } spin_unlock_bh(&nf_conntrack_lock); Note that del_timer always returns false if the timer has been initialized. However, the timer was not initialized since setup_timer was not called, therefore, the expectation timer remains in some undefined state. If I'm not missing anything, this may lead to the removal an unexistent expectation. To fix this, the optimization that allows refreshing an expectation is removed. Now nf_conntrack_expect_related looks more consistent to me since it always add the expectation in case that it returns success. Thanks to Patrick McHardy for participating in the discussion of this patch. I think this may be the source of the problem described by: http://marc.info/?l=netfilter-devel&m=134073514719421&w=2 Reported-by: Rafal Fitt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 45cf602a76bc..527651a53a45 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master, } } -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - const struct nf_conntrack_expect_policy *p; - - if (!del_timer(&i->timeout)) - return 0; - - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[i->class]; - i->timeout.expires = jiffies + p->timeout * HZ; - add_timer(&i->timeout); - return 1; -} - static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; @@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n; + struct hlist_node *n, *next; unsigned int h; int ret = 1; @@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_ct_expect_put(i); + break; } } else if (expect_clash(i, expect)) { ret = -EBUSY; -- cgit v1.2.1 From 6932f119bd71a315a1945276377277d91c4a71c6 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Thu, 16 Aug 2012 01:24:49 +0000 Subject: sctp: fix compile issue with disabled CONFIG_NET_NS struct seq_net_private has no struct net if CONFIG_NET_NS is not enabled Signed-off-by: Ulrich Weber Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 3e62ee55228f..d9cb2ab149fe 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -199,7 +199,6 @@ static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp endpoints (/proc/net/sctp/eps). */ static int sctp_eps_seq_show(struct seq_file *seq, void *v) { - struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; @@ -216,7 +215,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; - if (!net_eq(sock_net(sk), priv->net)) + if (!net_eq(sock_net(sk), seq_file_net(seq))) continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, @@ -307,7 +306,6 @@ static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { - struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -324,7 +322,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; - if (!net_eq(sock_net(sk), priv->net)) + if (!net_eq(sock_net(sk), seq_file_net(seq))) continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " @@ -423,7 +421,6 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { - struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -438,7 +435,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { - if (!net_eq(sock_net(epb->sk), priv->net)) + if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) continue; assoc = sctp_assoc(epb); list_for_each_entry(tsp, &assoc->peer.transport_addr_list, -- cgit v1.2.1 From 02644a17457414f38e29f32d5c640b06d08fa092 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Aug 2012 03:16:19 +0000 Subject: sctp: fix bogus if statement in sctp_auth_recv_cid() There is an extra semi-colon here, so we always return 0 instead of calling __sctp_auth_cid(). Signed-off-by: Dan Carpenter Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index aaa6c121ecce..159b9bc5d633 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -695,7 +695,7 @@ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) return 0; net = sock_net(asoc->base.sk); - if (!net->sctp.auth_enable); + if (!net->sctp.auth_enable) return 0; return __sctp_auth_cid(chunk, -- cgit v1.2.1 From 16c0b164bd24d44db137693a36b428ba28970c62 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 15 Aug 2012 20:44:27 +0000 Subject: act_mirred: do not drop packets when fails to mirror it We drop packet unconditionally when we fail to mirror it. This is not intended in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge to a tap device used by a VM. When kernel fails to mirror the packet in conditions such as when qemu crashes or stop polling the tap, it's hard for the management software to detect such condition and clean the the mirroring before. This would lead all packets to the bridge to be dropped and break the netowrk of other virtual machines. To solve the issue, the patch does not drop packets when kernel fails to mirror it, and only drop the redirected packets. Signed-off-by: Jason Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc18e9e0..9c0fd0c78814 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, out: if (err) { m->tcf_qstats.overlimits++; - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - retval = TC_ACT_SHOT; - } else { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + retval = TC_ACT_SHOT; + else + retval = m->tcf_action; + } else retval = m->tcf_action; - } spin_unlock(&m->tcf_lock); return retval; -- cgit v1.2.1 From f796c20cf67aa54c9130d0dc41307c0025719b85 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:24 +0000 Subject: net: netprio: fix files lock and remove useless d_path bits Add lock to prevent a race with a file closing and also remove useless and ugly sscanf code. The extra code was never needed and the case it supposedly protected against is in fact handled correctly by sock_from_file as pointed out by Al Viro. CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ed0c0431fcd8..f65dba3afd99 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -277,12 +277,6 @@ out_free_devname: void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; - char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); - - if (!tmp) { - pr_warn("Unable to attach cgrp due to alloc failure!\n"); - return; - } cgroup_taskset_for_each(p, cgrp, tset) { unsigned int fd; @@ -296,32 +290,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) continue; } - rcu_read_lock(); + spin_lock(&files->file_lock); fdt = files_fdtable(files); for (fd = 0; fd < fdt->max_fds; fd++) { - char *path; struct file *file; struct socket *sock; - unsigned long s; - int rv, err = 0; + int err; file = fcheck_files(files, fd); if (!file) continue; - path = d_path(&file->f_path, tmp, PAGE_SIZE); - rv = sscanf(path, "socket:[%lu]", &s); - if (rv <= 0) - continue; - sock = sock_from_file(file, &err); - if (!err) + if (sock) sock_update_netprioidx(sock->sk, p); } - rcu_read_unlock(); + spin_unlock(&files->file_lock); task_unlock(p); } - kfree(tmp); } static struct cftype ss_files[] = { -- cgit v1.2.1 From 48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:30 +0000 Subject: net: netprio: fd passed in SCM_RIGHTS datagram not set correctly A socket fd passed in a SCM_RIGHTS datagram was not getting updated with the new tasks cgrp prioidx. This leaves IO on the socket tagged with the old tasks priority. To fix this add a check in the scm recvmsg path to update the sock cgrp prioidx with the new tasks value. Thanks to Al Viro for catching this. CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/scm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd68ef4..040cebeed45b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); isk, current); fd_install(new_fd, fp[i]); } -- cgit v1.2.1 From 476ad154f3b41dd7d9a08a2f641e28388abc2fd1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:35 +0000 Subject: net: netprio: fix cgrp create and write priomap race A race exists where creating cgroups and also updating the priomap may result in losing a priomap update. This is because priomap writers are not protected by rtnl_lock. Move priority writer into rtnl_lock()/rtnl_unlock(). CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index f65dba3afd99..c75e3f9d060f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev) u32 max_len; struct netprio_map *map; - rtnl_lock(); max_len = atomic_read(&max_prioidx) + 1; map = rtnl_dereference(dev->priomap); if (!map || map->priomap_len < max_len) ret = extend_netdev_table(dev, max_len); - rtnl_unlock(); return ret; } @@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; + rtnl_lock(); ret = write_update_netdev_table(dev); if (ret < 0) goto out_put_dev; - rcu_read_lock(); - map = rcu_dereference(dev->priomap); + map = rtnl_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; - rcu_read_unlock(); out_put_dev: + rtnl_unlock(); dev_put(dev); out_free_devname: -- cgit v1.2.1 From 898132ae76d1aeb52301f10e8795c34fbb54e853 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Aug 2012 16:15:02 +0300 Subject: ipv6: move dereference after check in fl_free() There is a dereference before checking for NULL bug here. Generally free() functions should accept NULL pointers. For example, fl_create() can pass a NULL pointer to fl_free() on the error path. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman --- net/ipv6/ip6_flowlabel.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c836a6a20a34..90bbefb57943 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -91,12 +91,9 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { - switch (fl->share) { - case IPV6_FL_S_PROCESS: - put_pid(fl->owner.pid); - break; - } if (fl) { + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); } -- cgit v1.2.1 From 56892261ed1a854db5363df8bb3fbdb2c6c28d4c Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 16 Aug 2012 17:51:25 +0800 Subject: xfrm: Use rcu_dereference_bh to deference pointer protected by rcu_read_lock_bh Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5ad4d2c4b83c..6405764eaa82 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2501,11 +2501,11 @@ static void __net_init xfrm_dst_ops_init(struct net *net) struct xfrm_policy_afinfo *afinfo; rcu_read_lock_bh(); - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); + afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET]); if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; #if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); + afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET6]); if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; #endif -- cgit v1.2.1 From 5ef5d6c569f80cf716d75fa88e9b5ee72f0986b2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Aug 2012 03:14:04 +0000 Subject: gre: information leak in ip6_tnl_ioctl() There is a one byte hole between p->hop_limit and p->flowinfo where stack memory is leaked to the user. This was introduced in c12b395a46 "gre: Support GRE over IPv6". Signed-off-by: Dan Carpenter --- net/ipv6/ip6_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 33d2a0e6712d..cb7e2ded6f08 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1312,6 +1312,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); } if (t == NULL) t = netdev_priv(dev); -- cgit v1.2.1 From 16f01365fa01150bf3606fe702a80a03ec87953a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 16 Aug 2012 05:34:22 +0000 Subject: packet: Report rings cfg via diag engine One extension bit may result in two nlattrs -- one per ring type. If some ring type is not configured, then the respective nlatts will be empty. The structure reported contains the data, that is given to the corresponding ring setup socket option. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/diag.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/diag.c b/net/packet/diag.c index 3dda4ecb9a46..e3975e4d458c 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -67,12 +67,54 @@ static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) return 0; } +static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type, + struct sk_buff *nlskb) +{ + struct packet_diag_ring pdr; + + if (!ring->pg_vec || ((ver > TPACKET_V2) && + (nl_type == PACKET_DIAG_TX_RING))) + return 0; + + pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + pdr.pdr_block_nr = ring->pg_vec_len; + pdr.pdr_frame_size = ring->frame_size; + pdr.pdr_frame_nr = ring->frame_max + 1; + + if (ver > TPACKET_V2) { + pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov; + pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv; + pdr.pdr_features = ring->prb_bdqc.feature_req_word; + } else { + pdr.pdr_retire_tmo = 0; + pdr.pdr_sizeof_priv = 0; + pdr.pdr_features = 0; + } + + return nla_put(nlskb, nl_type, sizeof(pdr), &pdr); +} + +static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) +{ + int ret; + + mutex_lock(&po->pg_vec_lock); + ret = pdiag_put_ring(&po->rx_ring, po->tp_version, + PACKET_DIAG_RX_RING, skb); + if (!ret) + ret = pdiag_put_ring(&po->tx_ring, po->tp_version, + PACKET_DIAG_TX_RING, skb); + mutex_unlock(&po->pg_vec_lock); + + return ret; +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; - const struct packet_sock *po = pkt_sk(sk); + struct packet_sock *po = pkt_sk(sk); nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) @@ -93,6 +135,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag pdiag_put_mclist(po, skb)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_RING_CFG) && + pdiag_put_rings_cfg(po, skb)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.1 From fff3321d75b1a18231876a1aceb36eacbbf6221e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 16 Aug 2012 05:36:48 +0000 Subject: packet: Report fanout status via diag engine Reported value is the same reported by the FANOUT getsockoption, but unlike it, the absent fanout setup results in absent nlattr, rather than in nlattr with zero value. This is done so, since zero fanout report may mean both -- no fanout, and fanout with both id and type zero. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/packet/af_packet.c | 23 +++-------------------- net/packet/diag.c | 20 ++++++++++++++++++++ net/packet/internal.h | 20 +++++++++++++++++++- 3 files changed, 42 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8a1605ae4029..226b2cdfc339 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -207,24 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -#define PACKET_FANOUT_MAX 256 - -struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif - unsigned int num_members; - u16 id; - u8 type; - u8 defrag; - atomic_t rr_cur; - struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; - spinlock_t lock; - atomic_t sk_ref; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - struct packet_skb_cb { unsigned int origlen; union { @@ -1148,7 +1130,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } -static DEFINE_MUTEX(fanout_mutex); +DEFINE_MUTEX(fanout_mutex); +EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static void __fanout_link(struct sock *sk, struct packet_sock *po) @@ -1260,9 +1243,9 @@ static void fanout_release(struct sock *sk) if (!f) return; + mutex_lock(&fanout_mutex); po->fanout = NULL; - mutex_lock(&fanout_mutex); if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); diff --git a/net/packet/diag.c b/net/packet/diag.c index e3975e4d458c..bc33fbe8a5ef 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -109,6 +109,22 @@ static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) return ret; } +static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) +{ + int ret = 0; + + mutex_lock(&fanout_mutex); + if (po->fanout) { + u32 val; + + val = (u32)po->fanout->id | ((u32)po->fanout->type << 16); + ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val); + } + mutex_unlock(&fanout_mutex); + + return ret; +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { @@ -139,6 +155,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag pdiag_put_rings_cfg(po, skb)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_FANOUT) && + pdiag_put_fanout(po, skb)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: diff --git a/net/packet/internal.h b/net/packet/internal.h index 2c5fca28b242..44945f6b7252 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -67,7 +67,25 @@ struct packet_ring_buffer { atomic_t pending; }; -struct packet_fanout; +extern struct mutex fanout_mutex; +#define PACKET_FANOUT_MAX 256 + +struct packet_fanout { +#ifdef CONFIG_NET_NS + struct net *net; +#endif + unsigned int num_members; + u16 id; + u8 type; + u8 defrag; + atomic_t rr_cur; + struct list_head list; + struct sock *arr[PACKET_FANOUT_MAX]; + spinlock_t lock; + atomic_t sk_ref; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; -- cgit v1.2.1 From fa7f86f1bb5d8f08d10442a546252d2670b26f41 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:06 +0000 Subject: tipc: optimize the initialization of network device notifier Ethernet media initialization is only done when TIPC is started or switched to network mode. So the initialization of the network device notifier structure can be moved out of this function and done statically instead. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 90ac9bfa7abb..0f312c261bed 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -58,7 +58,16 @@ struct eth_bearer { static struct tipc_media eth_media_info; static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; -static struct notifier_block notifier; + +static int recv_notification(struct notifier_block *nb, unsigned long evt, + void *dv); +/* + * Network device notifier info + */ +static struct notifier_block notifier = { + .notifier_call = recv_notification, + .priority = 0 +}; /** * eth_media_addr_set - initialize Ethernet media address structure @@ -357,8 +366,6 @@ int tipc_eth_media_start(void) if (res) return res; - notifier.notifier_call = &recv_notification; - notifier.priority = 0; res = register_netdevice_notifier(¬ifier); if (!res) eth_started = 1; -- cgit v1.2.1 From 4225a398c1352a7a5c14dc07277cb5cc4473983b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:07 +0000 Subject: tipc: fix lockdep warning during bearer initialization When the lockdep validator is enabled, it will report the below warning when we enable a TIPC bearer: [ INFO: possible irq lock inversion dependency detected ] --------------------------------------------------------- Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(ptype_lock); local_irq_disable(); lock(tipc_net_lock); lock(ptype_lock); lock(tipc_net_lock); *** DEADLOCK *** the shortest dependencies between 2nd lock and 1st lock: -> (ptype_lock){+.+...} ops: 10 { [...] SOFTIRQ-ON-W at: [] __lock_acquire+0x528/0x13e0 [] lock_acquire+0x90/0x100 [] _raw_spin_lock+0x38/0x50 [] dev_add_pack+0x3a/0x60 [] arp_init+0x1a/0x48 [] inet_init+0x181/0x27e [] do_one_initcall+0x34/0x170 [] kernel_init+0x110/0x1b2 [] kernel_thread_helper+0x6/0x10 [...] ... key at: [] ptype_lock+0x10/0x20 ... acquired at: [] lock_acquire+0x90/0x100 [] _raw_spin_lock+0x38/0x50 [] dev_add_pack+0x3a/0x60 [] enable_bearer+0xf2/0x140 [tipc] [] tipc_enable_bearer+0x1ba/0x450 [tipc] [] tipc_cfg_do_cmd+0x5c4/0x830 [tipc] [] handle_cmd+0x42/0xd0 [tipc] [] genl_rcv_msg+0x232/0x280 [] netlink_rcv_skb+0x86/0xb0 [] genl_rcv+0x1c/0x30 [] netlink_unicast+0x174/0x1f0 [] netlink_sendmsg+0x1eb/0x2d0 [] sock_aio_write+0x161/0x170 [] do_sync_write+0xac/0xf0 [] vfs_write+0x156/0x170 [] sys_write+0x42/0x70 [] sysenter_do_call+0x12/0x38 [...] } -> (tipc_net_lock){+..-..} ops: 4 { [...] IN-SOFTIRQ-R at: [] __lock_acquire+0x64a/0x13e0 [] lock_acquire+0x90/0x100 [] _raw_read_lock_bh+0x3d/0x50 [] tipc_recv_msg+0x1d/0x830 [tipc] [] recv_msg+0x3f/0x50 [tipc] [] __netif_receive_skb+0x22a/0x590 [] netif_receive_skb+0x2b/0xf0 [] pcnet32_poll+0x292/0x780 [] net_rx_action+0xfa/0x1e0 [] __do_softirq+0xae/0x1e0 [...] } >From the log, we can see three different call chains between CPU0 and CPU1: Time 0 on CPU0: kernel_init()->inet_init()->dev_add_pack() At time 0, the ptype_lock is held by CPU0 in dev_add_pack(); Time 1 on CPU1: tipc_enable_bearer()->enable_bearer()->dev_add_pack() At time 1, tipc_enable_bearer() first holds tipc_net_lock, and then wants to take ptype_lock to register TIPC protocol handler into the networking stack. But the ptype_lock has been taken by dev_add_pack() on CPU0, so at this time the dev_add_pack() running on CPU1 has to be busy looping. Time 2 on CPU0: netif_receive_skb()->recv_msg()->tipc_recv_msg() At time 2, an incoming TIPC packet arrives at CPU0, hence tipc_recv_msg() will be invoked. In tipc_recv_msg(), it first wants to hold tipc_net_lock. At the moment, below scenario happens: On CPU0, below is our sequence of taking locks: lock(ptype_lock)->lock(tipc_net_lock) On CPU1, our sequence of taking locks looks like: lock(tipc_net_lock)->lock(ptype_lock) Obviously deadlock may happen in this case. But please note the deadlock possibly doesn't occur at all when the first TIPC bearer is enabled. Before enable_bearer() -- running on CPU1 does not hold ptype_lock, so the TIPC receive handler (i.e. recv_msg()) is not registered successfully via dev_add_pack(), so the tipc_recv_msg() cannot be called by recv_msg() even if a TIPC message comes to CPU0. But when the second TIPC bearer is registered, the deadlock can perhaps really happen. To fix it, we will push the work of registering TIPC protocol handler into workqueue context. After the change, both paths taking ptype_lock are always in process contexts, thus, the deadlock should never occur. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 0f312c261bed..2132c1ef2951 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -46,12 +46,14 @@ * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Ethernet network device * @tipc_packet_type: used in binding TIPC to Ethernet driver + * @setup: work item used when enabling bearer * @cleanup: work item used when disabling bearer */ struct eth_bearer { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; + struct work_struct setup; struct work_struct cleanup; }; @@ -142,6 +144,17 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, return 0; } +/** + * setup_bearer - setup association between Ethernet bearer and interface + */ +static void setup_bearer(struct work_struct *work) +{ + struct eth_bearer *eb_ptr = + container_of(work, struct eth_bearer, setup); + + dev_add_pack(&eb_ptr->tipc_packet_type); +} + /** * enable_bearer - attach TIPC bearer to an Ethernet interface */ @@ -182,7 +195,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) eb_ptr->tipc_packet_type.func = recv_msg; eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - dev_add_pack(&eb_ptr->tipc_packet_type); + INIT_WORK(&eb_ptr->setup, setup_bearer); + schedule_work(&eb_ptr->setup); /* Associate TIPC bearer with Ethernet bearer */ eb_ptr->bearer = tb_ptr; -- cgit v1.2.1 From fc0739385ba10f59105e87a46cc93d9d9a10553c Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:08 +0000 Subject: tipc: remove pointless name sanity check and tipc_alphabet array There is no real reason to check whether all letters in the given media name and network interface name are within the character set defined in tipc_alphabet array. Even if we eliminate the checking, the rest of checking conditions in tipc_enable_bearer() can ensure we do not enable an invalid or illegal bearer. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/bearer.c | 6 ++---- net/tipc/core.c | 3 --- net/tipc/core.h | 2 -- net/tipc/link.c | 4 +--- 4 files changed, 3 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 09e71241265d..6b2faa57a394 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -60,7 +60,7 @@ static int media_name_valid(const char *name) len = strlen(name); if ((len + 1) > TIPC_MAX_MEDIA_NAME) return 0; - return strspn(name, tipc_alphabet) == len; + return 1; } /** @@ -206,9 +206,7 @@ static int bearer_name_validate(const char *name, /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || - (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) || - (strspn(media_name, tipc_alphabet) != (media_len - 1)) || - (strspn(if_name, tipc_alphabet) != (if_len - 1))) + (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ diff --git a/net/tipc/core.c b/net/tipc/core.c index 6586eac6a50e..c261a5dca7c5 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -50,9 +50,6 @@ /* global variables used by multiple sub-systems within TIPC */ int tipc_random; -const char tipc_alphabet[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; - /* configurable TIPC parameters */ u32 tipc_own_addr; int tipc_max_ports; diff --git a/net/tipc/core.h b/net/tipc/core.h index fd42e106c185..e4e46cd2d0e6 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -85,8 +85,6 @@ extern int tipc_remote_management; * Other global variables */ extern int tipc_random; -extern const char tipc_alphabet[]; - /* * Routines available to privileged subsystems diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c1e6151875e..a79c755cb417 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -210,9 +210,7 @@ static int link_name_validate(const char *name, (z_local > 255) || (c_local > 4095) || (n_local > 4095) || (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) || (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || - (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME) || - (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) || - (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1))) + (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME)) return 0; /* return link name components, if necessary */ -- cgit v1.2.1 From 381294331ed2858f8e75223310f873d580921366 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:09 +0000 Subject: tipc: manually inline single use media_name_valid routine After eliminating the mechanism which checks whether all letters in media name string are within a given character set, the media_name_valid routine becomes trivial. It is also only used once, so it is unnecessary to keep it as a separate function. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/bearer.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6b2faa57a394..4ec5c80e8a7c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -48,21 +48,6 @@ struct tipc_bearer tipc_bearers[MAX_BEARERS]; static void bearer_disable(struct tipc_bearer *b_ptr); -/** - * media_name_valid - validate media name - * - * Returns 1 if media name is valid, otherwise 0. - */ -static int media_name_valid(const char *name) -{ - u32 len; - - len = strlen(name); - if ((len + 1) > TIPC_MAX_MEDIA_NAME) - return 0; - return 1; -} - /** * tipc_media_find - locates specified media object by name */ @@ -102,7 +87,7 @@ int tipc_register_media(struct tipc_media *m_ptr) write_lock_bh(&tipc_net_lock); - if (!media_name_valid(m_ptr->name)) + if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) goto exit; if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || !m_ptr->bcast_addr.broadcast) -- cgit v1.2.1 From 379c0456afc170d56ceb712a5689ede91d293e88 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:10 +0000 Subject: tipc: change tipc_net_start routine return value type Since now tipc_net_start() always returns a success code - 0, its return value type should be changed from integer to void, which can avoid unnecessary check for its return value. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 5 ++--- net/tipc/net.c | 3 +-- net/tipc/net.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index c261a5dca7c5..daae7f74d418 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -98,9 +98,8 @@ int tipc_core_start_net(unsigned long addr) { int res; - res = tipc_net_start(addr); - if (!res) - res = tipc_eth_media_start(); + tipc_net_start(addr); + res = tipc_eth_media_start(); if (res) tipc_core_stop_net(); return res; diff --git a/net/tipc/net.c b/net/tipc/net.c index 5b5cea259caf..7d305ecc09c2 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -171,7 +171,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(u32 addr) +void tipc_net_start(u32 addr) { char addr_string[16]; @@ -187,7 +187,6 @@ int tipc_net_start(u32 addr) pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); - return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index 9eb4b9e220eb..079daadb3f72 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -41,7 +41,7 @@ extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); -int tipc_net_start(u32 addr); +void tipc_net_start(u32 addr); void tipc_net_stop(void); #endif -- cgit v1.2.1 From f046e7d9be1cbb3335694c7f9a31d18e1f998ff5 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:11 +0000 Subject: tipc: convert tipc_nametbl_size type from variable to macro There is nothing changing this variable dynamically, so change it to a macro to make that more obvious when reading the code. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/name_table.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 360c478b0b53..4ebdcc96cb04 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -41,7 +41,7 @@ #include "subscr.h" #include "port.h" -static int tipc_nametbl_size = 1024; /* must be a power of 2 */ +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** * struct name_info - name sequence publication info @@ -114,7 +114,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) { - return x & (tipc_nametbl_size - 1); + return x & (TIPC_NAMETBL_SIZE - 1); } /** @@ -871,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf, len, depth); lowbound = 0; upbound = ~0; - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, @@ -935,7 +935,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) int tipc_nametbl_init(void) { - table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), + table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), GFP_ATOMIC); if (!table.types) return -ENOMEM; @@ -953,7 +953,7 @@ void tipc_nametbl_stop(void) /* Verify name table is empty, then release it */ write_lock_bh(&tipc_nametbl_lock); - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; pr_err("nametbl_stop(): orphaned hash chain detected\n"); -- cgit v1.2.1 From 61cdd4d80b29cfdee45920238eea2d1fbb51f922 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:12 +0000 Subject: tipc: add __read_mostly annotations to several global variables Added to the following: - tipc_random - tipc_own_addr - tipc_max_ports - tipc_net_id - tipc_remote_management - handler_enabled The above global variables are read often, but written rarely. Use __read_mostly to prevent them being on the same cacheline as another variable which is written to often, which would cause cacheline bouncing. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 10 +++++----- net/tipc/core.h | 10 +++++----- net/tipc/handler.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index daae7f74d418..b858f2003523 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -48,15 +48,15 @@ /* global variables used by multiple sub-systems within TIPC */ -int tipc_random; +int tipc_random __read_mostly; /* configurable TIPC parameters */ -u32 tipc_own_addr; -int tipc_max_ports; +u32 tipc_own_addr __read_mostly; +int tipc_max_ports __read_mostly; int tipc_max_subscriptions; int tipc_max_publications; -int tipc_net_id; -int tipc_remote_management; +int tipc_net_id __read_mostly; +int tipc_remote_management __read_mostly; /** diff --git a/net/tipc/core.h b/net/tipc/core.h index e4e46cd2d0e6..4c5705ac8a5a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -74,17 +74,17 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); /* * Global configuration variables */ -extern u32 tipc_own_addr; -extern int tipc_max_ports; +extern u32 tipc_own_addr __read_mostly; +extern int tipc_max_ports __read_mostly; extern int tipc_max_subscriptions; extern int tipc_max_publications; -extern int tipc_net_id; -extern int tipc_remote_management; +extern int tipc_net_id __read_mostly; +extern int tipc_remote_management __read_mostly; /* * Other global variables */ -extern int tipc_random; +extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 7a52d3922f3c..111ff8300ae5 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -45,7 +45,7 @@ struct queue_item { static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled; +static int handler_enabled __read_mostly; static void process_signal_queue(unsigned long dummy); -- cgit v1.2.1 From 34f256cc7962a44537a0d33877cd93c89873098e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:13 +0000 Subject: tipc: eliminate configuration for maximum number of name subscriptions Gets rid of the need for users to specify the maximum number of name subscriptions supported by TIPC. TIPC now automatically provides support for the maximum number of name subscriptions to 65535. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 25 +++---------------------- net/tipc/core.c | 2 -- net/tipc/core.h | 4 ++-- net/tipc/subscr.c | 4 ++-- 4 files changed, 7 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index a056a3852f71..7a1275863c8a 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2012, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -223,21 +223,6 @@ static struct sk_buff *cfg_set_max_publications(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_subscriptions(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max subscriptions must be 1-65535"); - tipc_max_subscriptions = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_max_ports(void) { u32 value; @@ -360,9 +345,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_PUBL: rep_tlv_buf = cfg_set_max_publications(); break; - case TIPC_CMD_SET_MAX_SUBSCR: - rep_tlv_buf = cfg_set_max_subscriptions(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -375,9 +357,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_PUBL: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications); break; - case TIPC_CMD_GET_MAX_SUBSCR: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -393,6 +372,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_CLUSTERS: case TIPC_CMD_SET_MAX_NODES: case TIPC_CMD_GET_MAX_NODES: + case TIPC_CMD_SET_MAX_SUBSCR: + case TIPC_CMD_GET_MAX_SUBSCR: case TIPC_CMD_SET_LOG_SIZE: case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED diff --git a/net/tipc/core.c b/net/tipc/core.c index b858f2003523..73e5eac20735 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -53,7 +53,6 @@ int tipc_random __read_mostly; /* configurable TIPC parameters */ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; -int tipc_max_subscriptions; int tipc_max_publications; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; @@ -157,7 +156,6 @@ static int __init tipc_init(void) tipc_own_addr = 0; tipc_remote_management = 1; tipc_max_publications = 10000; - tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; diff --git a/net/tipc/core.h b/net/tipc/core.h index 4c5705ac8a5a..ef01412b0989 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -60,7 +60,8 @@ #define TIPC_MOD_VER "2.0.0" -#define ULTRA_STRING_MAX_LEN 32768 +#define ULTRA_STRING_MAX_LEN 32768 +#define TIPC_MAX_SUBSCRIPTIONS 65535 struct tipc_msg; /* msg.h */ @@ -76,7 +77,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); */ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; -extern int tipc_max_subscriptions; extern int tipc_max_publications; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 5ed5965eb0be..0f7d0d007e22 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { + if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", - tipc_max_subscriptions); + TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); return NULL; } -- cgit v1.2.1 From e6a04b1d3ff9d5af219b2fcaebe0ef04733d597c Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 16 Aug 2012 12:09:14 +0000 Subject: tipc: eliminate configuration for maximum number of name publications Gets rid of the need for users to specify the maximum number of name publications supported by TIPC. TIPC now automatically provides support for the maximum number of name publications to 65535. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 23 ++--------------------- net/tipc/core.c | 2 -- net/tipc/core.h | 2 +- net/tipc/name_table.c | 4 ++-- 4 files changed, 5 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index 7a1275863c8a..f67866c765dd 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -208,21 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_publications(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max publications must be 1-65535)"); - tipc_max_publications = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_max_ports(void) { u32 value; @@ -342,9 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_PORTS: rep_tlv_buf = cfg_set_max_ports(); break; - case TIPC_CMD_SET_MAX_PUBL: - rep_tlv_buf = cfg_set_max_publications(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -354,9 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_PORTS: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); break; - case TIPC_CMD_GET_MAX_PUBL: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -374,6 +353,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_NODES: case TIPC_CMD_SET_MAX_SUBSCR: case TIPC_CMD_GET_MAX_SUBSCR: + case TIPC_CMD_SET_MAX_PUBL: + case TIPC_CMD_GET_MAX_PUBL: case TIPC_CMD_SET_LOG_SIZE: case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED diff --git a/net/tipc/core.c b/net/tipc/core.c index 73e5eac20735..bfe8af88469a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -53,7 +53,6 @@ int tipc_random __read_mostly; /* configurable TIPC parameters */ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; -int tipc_max_publications; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; @@ -155,7 +154,6 @@ static int __init tipc_init(void) tipc_own_addr = 0; tipc_remote_management = 1; - tipc_max_publications = 10000; tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; diff --git a/net/tipc/core.h b/net/tipc/core.h index ef01412b0989..0207db04179a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -62,6 +62,7 @@ #define ULTRA_STRING_MAX_LEN 32768 #define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 struct tipc_msg; /* msg.h */ @@ -77,7 +78,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); */ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; -extern int tipc_max_publications; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 4ebdcc96cb04..98975e80bb51 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, { struct publication *publ; - if (table.local_publ_count >= tipc_max_publications) { + if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", - tipc_max_publications); + TIPC_MAX_PUBLICATIONS); return NULL; } -- cgit v1.2.1 From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 16 Aug 2012 22:02:58 +0000 Subject: af_packet: don't emit packet on orig fanout group If a packet is emitted on one socket in one group of fanout sockets, it is transmitted again. It is thus read again on one of the sockets of the fanout group. This result in a loop for software which generate packets when receiving one. This retransmission is not the intended behavior: a fanout group must behave like a single socket. The packet should not be transmitted on a socket if it originates from a socket belonging to the same fanout group. This patch fixes the issue by changing the transmission check to take fanout group info account. Reported-by: Aleksandr Kotov Signed-off-by: Eric Leblond Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++++++-- net/packet/af_packet.c | 9 +++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a39354ee1432..debd9372472f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1642,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) +{ + if (ptype->af_packet_priv == NULL) + return false; + + if (ptype->id_match) + return ptype->id_match(ptype, skb->sk); + else if ((struct sock *)ptype->af_packet_priv == skb->sk) + return true; + + return false; +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1659,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { + (!skb_loop_sk(ptype, skb))) { if (pt_prev) { deliver_skb(skb2, pt_prev, skb->dev); pt_prev = ptype; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ac890a1a4c0..aee7196aac36 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } -- cgit v1.2.1 From d92c7f8aabae913de16eb855b19cd2002c341896 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 17 Aug 2012 10:33:12 +0000 Subject: caif: Do not dereference NULL in chnl_recv_cb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In net/caif/chnl_net.c::chnl_recv_cb() we call skb_header_pointer() which may return NULL, but we do not check for a NULL pointer before dereferencing it. This patch adds such a NULL check and properly free's allocated memory and return an error (-EINVAL) on failure - much better than crashing.. Signed-off-by: Jesper Juhl Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 69771c04ba8f..e597733affb8 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) { + kfree_skb(skb); + return -EINVAL; + } switch (*ip_version >> 4) { case 4: -- cgit v1.2.1 From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 20 Aug 2012 02:56:56 -0700 Subject: net: ipv6: fix oops in inet_putpeer() Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not initialized, causing a false positive result from inetpeer_ptr_is_peer(), which in turn causes a NULL pointer dereference in inet_putpeer(). Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at inet_putpeer+0xe/0x16 EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641 ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8 [] xfrm6_dst_destroy+0x42/0xdb [] dst_destroy+0x1d/0xa4 [] xfrm_bundle_flo_delete+0x2b/0x36 [] flow_cache_gc_task+0x85/0x9f [] process_one_work+0x122/0x441 [] ? apic_timer_interrupt+0x31/0x38 [] ? flow_cache_new_hashrnd+0x2b/0x2b [] worker_thread+0x113/0x3cc Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to properly initialize the dst's peer pointer. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 8 ++++++++ net/xfrm/xfrm_policy.c | 2 ++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812107b1..f8c4c08ffb60 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) +{ + struct rt6_info *rt = (struct rt6_info *)xdst; + + rt6_init_peer(rt, net->ipv6.peers); +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..5a2aa17e4d3c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); -- cgit v1.2.1 From 3de7a37b02f607716753dc669c1dca4f71db08fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 14:36:44 +0000 Subject: net/core/dev.c: fix kernel-doc warning Fix kernel-doc warning: Warning(net/core/dev.c:5745): No description found for parameter 'dev' Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index debd9372472f..83988362805e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5744,6 +5744,7 @@ EXPORT_SYMBOL(netdev_refcnt_read); /** * netdev_wait_allrefs - wait until all references are gone. + * @dev: target net_device * * This is called when unregistering network devices. * -- cgit v1.2.1 From fae6ef87faeb8853896920c68ee703d715799d28 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 19 Aug 2012 03:30:38 +0000 Subject: net: tcp: move sk_rx_dst_set call after tcp_create_openreq_child() This commit removes the sk_rx_dst_set calls from tcp_create_openreq_child(), because at that point the icsk_af_ops field of ipv6_mapped TCP sockets has not been set to its proper final value. Instead, to make sure we get the right sk_rx_dst_set variant appropriate for the address family of the new connection, we have tcp_v{4,6}_syn_recv_sock() directly call the appropriate function shortly after the call to tcp_create_openreq_child() returns. This also moves inet6_sk_rx_dst_set() to avoid a forward declaration with the new approach. Signed-off-by: Neal Cardwell Reported-by: Artem Savkov Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 2 -- net/ipv6/tcp_ipv6.c | 25 +++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 767823764016..5bf2040b25b1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1462,6 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c9dcef2de3..6ff7f10dce9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb9ce2b2f377..a3e60cc04a8a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } #endif +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1729,18 +1742,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt = (const struct rt6_info *)dst; - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; -} - static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit v1.2.1 From 2dba62c30ec3040ef1b647a8976fd7e6854cc7a7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 19 Aug 2012 10:16:08 +0000 Subject: netfilter: nfnetlink_log: fix NLA_PUT macro removal bug Commit 1db20a52 (nfnetlink_log: Stop using NLA_PUT*().) incorrectly converted a NLA_PUT_BE16 macro to nla_put_be32() in nfnetlink_log: - NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 169ab59ed9d4..592091c1260b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -480,7 +480,7 @@ __build_packet_message(struct nfulnl_instance *inst, } if (indev && skb_mac_header_was_set(skb)) { - if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || + if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, htons(skb->dev->hard_header_len)) || nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, -- cgit v1.2.1 From fe31d1a8605f020eee859375fdd877186e01b804 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 19 Aug 2012 10:16:09 +0000 Subject: netfilter: sparse endian fixes Fix a couple of endian annotation in net/netfilter: net/netfilter/nfnetlink_acct.c:82:30: warning: cast to restricted __be64 net/netfilter/nfnetlink_acct.c:86:30: warning: cast to restricted __be64 net/netfilter/nfnetlink_cthelper.c:77:28: warning: cast to restricted __be16 net/netfilter/xt_NFQUEUE.c:46:16: warning: restricted __be32 degrades to integer net/netfilter/xt_NFQUEUE.c:60:34: warning: restricted __be32 degrades to integer net/netfilter/xt_NFQUEUE.c:68:34: warning: restricted __be32 degrades to integer net/netfilter/xt_osf.c:272:55: warning: cast to restricted __be16 Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 4 ++-- net/netfilter/nfnetlink_cthelper.c | 2 +- net/netfilter/xt_NFQUEUE.c | 8 +++++--- net/netfilter/xt_osf.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b2e7310ca0b8..d7ec92879071 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, - be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES]))); } if (tb[NFACCT_PKTS]) { atomic64_set(&nfacct->pkts, - be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d6836193d479..32a1ba3f3e27 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; - tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); return 0; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 7babe7d68716..817f9e9f2b16 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); /* packets in either direction go into same queue */ - if (iph->saddr < iph->daddr) + if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, (__force u32)iph->daddr, iph->protocol, jhash_initval); @@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb) const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; - if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) { + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { a = (__force u32) ip6h->saddr.s6_addr32[3]; b = (__force u32) ip6h->daddr.s6_addr32[3]; } else { @@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb) a = (__force u32) ip6h->daddr.s6_addr32[3]; } - if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1]) + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) c = (__force u32) ip6h->saddr.s6_addr32[1]; else c = (__force u32) ip6h->daddr.s6_addr32[1]; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 846f895cb656..a5e673d32bda 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) mss <<= 8; mss |= optp[2]; - mss = ntohs(mss); + mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: loop_cont = 1; -- cgit v1.2.1 From 2834a6386b86a5dd20380aadb01897c525448aa7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 19 Aug 2012 10:16:10 +0000 Subject: netfilter: nf_conntrack: remove unnecessary RTNL locking Locking the rtnl was added to nf_conntrack_l{3,4}_proto_unregister() for walking the network namespace list. This is not done anymore since we have proper namespace support in the protocols now, so we don't need to take the RTNL anymore. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 0dc63854390f..51e928db48c8 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net, nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l3proto, proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net, nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); -- cgit v1.2.1 From aa7a00809cf6afe3cd6f5af2889110b47b798667 Mon Sep 17 00:00:00 2001 From: Eyal Shapira Date: Mon, 6 Aug 2012 14:26:16 +0300 Subject: mac80211: avoid using synchronize_rcu in ieee80211_set_probe_resp This could take a while (100ms+) and may delay sending assoc resp in AP mode with WPS or P2P GO (as setting the probe resp takes place there). We've encountered situations where the delay was big enough to cause connection problems with devices like Galaxy Nexus. Switch to using call_rcu with a free handler. [Arik - rework to use plain buffer and instead of skb] Signed-off-by: Eyal Shapira Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 16 +++++++--------- net/mac80211/ieee80211_i.h | 8 +++++++- net/mac80211/iface.c | 4 ++-- net/mac80211/tx.c | 7 +++++-- 4 files changed, 21 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index df64b455821d..8052a7ad03a6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -725,25 +725,23 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len) { - struct sk_buff *new, *old; + struct probe_resp *new, *old; if (!resp || !resp_len) - return 1; + return -EINVAL; old = rtnl_dereference(sdata->u.ap.probe_resp); - new = dev_alloc_skb(resp_len); + new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; - memcpy(skb_put(new, resp_len), resp, resp_len); + new->len = resp_len; + memcpy(new->data, resp, resp_len); rcu_assign_pointer(sdata->u.ap.probe_resp, new); - if (old) { - /* TODO: use call_rcu() */ - synchronize_rcu(); - dev_kfree_skb(old); - } + if (old) + kfree_rcu(old, rcu_head); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d1a7c58a8c62..e22aee83ba53 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -272,9 +272,15 @@ struct beacon_data { struct rcu_head rcu_head; }; +struct probe_resp { + struct rcu_head rcu_head; + int len; + u8 data[0]; +}; + struct ieee80211_if_ap { struct beacon_data __rcu *beacon; - struct sk_buff __rcu *probe_resp; + struct probe_resp __rcu *probe_resp; struct list_head vlans; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index abee3a0c25ed..fbab7a84ca21 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -715,7 +715,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data *vlan, *tmpsdata; struct beacon_data *old_beacon = rtnl_dereference(sdata->u.ap.beacon); - struct sk_buff *old_probe_resp = + struct probe_resp *old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); /* sdata_running will return false, so this will disable */ @@ -727,7 +727,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(sdata->u.ap.probe_resp, NULL); synchronize_rcu(); kfree(old_beacon); - kfree_skb(old_probe_resp); + kfree(old_probe_resp); /* down all dependent devices, that is VLANs */ list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7dbcf293708b..2d004ba0615e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2481,7 +2481,8 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ieee80211_if_ap *ap = NULL; - struct sk_buff *presp = NULL, *skb = NULL; + struct sk_buff *skb = NULL; + struct probe_resp *presp = NULL; struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); @@ -2495,10 +2496,12 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, if (!presp) goto out; - skb = skb_copy(presp, GFP_ATOMIC); + skb = dev_alloc_skb(presp->len); if (!skb) goto out; + memcpy(skb_put(skb, presp->len), presp->data, presp->len); + hdr = (struct ieee80211_hdr *) skb->data; memset(hdr->addr1, 0, sizeof(hdr->addr1)); -- cgit v1.2.1 From b22bd5221cfe80ee3d345d9deccfd29edf9bafb4 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 9 Aug 2012 18:15:39 -0700 Subject: mac80211: use skb_queue_walk() in mesh_path_assign_nexthop Since all we really want is just to iterate over all skbs, do just that and avoid (de)queueing to a clusmy tmpq. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index bec7b281b5ba..b819d6b847d0 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -203,23 +203,17 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_hdr *hdr; - struct sk_buff_head tmpq; unsigned long flags; rcu_assign_pointer(mpath->next_hop, sta); - __skb_queue_head_init(&tmpq); - spin_lock_irqsave(&mpath->frame_queue.lock, flags); - - while ((skb = __skb_dequeue(&mpath->frame_queue)) != NULL) { + skb_queue_walk(&mpath->frame_queue, skb) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); - __skb_queue_tail(&tmpq, skb); } - skb_queue_splice(&tmpq, &mpath->frame_queue); spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } -- cgit v1.2.1 From 4bd4c2dd8e734868ae9f0ceb87a6edd27df8f45c Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 9 Aug 2012 18:15:40 -0700 Subject: mac80211: clean up mpath_move_to_queue() Use skb_queue_walk_safe instead, and fix a few issues: - didn't free old skbs on moving - didn't react to failed skb alloc - needlessly held a local pointer to the destination frame queue - didn't check destination queue length before adding skb Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh.h | 3 +++ net/mac80211/mesh_hwmp.c | 2 -- net/mac80211/mesh_pathtbl.c | 34 ++++++++++++++++++---------------- 3 files changed, 21 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index faaa39bcfd10..e4d911ffd5d1 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -215,6 +215,9 @@ struct mesh_rmc { /* Maximum number of paths per interface */ #define MESH_MAX_MPATHS 1024 +/* Number of frames buffered per destination for unresolved destinations */ +#define MESH_FRAME_QUEUE_LEN 10 + /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 494bc39f61a4..47aeee2d8db1 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -17,8 +17,6 @@ #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 -/* Number of frames buffered per destination for unresolved destinations */ -#define MESH_FRAME_QUEUE_LEN 10 #define MAX_PREQ_QUEUE_LEN 64 /* Destination only */ diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index b819d6b847d0..aa749818860e 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -279,40 +279,42 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, bool copy) { - struct sk_buff *skb, *cp_skb = NULL; - struct sk_buff_head gateq, failq; + struct sk_buff *skb, *fskb, *tmp; + struct sk_buff_head failq; unsigned long flags; - int num_skbs; BUG_ON(gate_mpath == from_mpath); BUG_ON(!gate_mpath->next_hop); - __skb_queue_head_init(&gateq); __skb_queue_head_init(&failq); spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice_init(&from_mpath->frame_queue, &failq); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); - num_skbs = skb_queue_len(&failq); - - while (num_skbs--) { - skb = __skb_dequeue(&failq); - if (copy) { - cp_skb = skb_copy(skb, GFP_ATOMIC); - if (cp_skb) - __skb_queue_tail(&failq, cp_skb); + skb_queue_walk_safe(&failq, fskb, tmp) { + if (skb_queue_len(&gate_mpath->frame_queue) >= + MESH_FRAME_QUEUE_LEN) { + mpath_dbg(gate_mpath->sdata, "mpath queue full!\n"); + break; } + skb = skb_copy(fskb, GFP_ATOMIC); + if (WARN_ON(!skb)) + break; + prepare_for_gate(skb, gate_mpath->dst, gate_mpath); - __skb_queue_tail(&gateq, skb); + skb_queue_tail(&gate_mpath->frame_queue, skb); + + if (copy) + continue; + + __skb_unlink(fskb, &failq); + kfree_skb(fskb); } - spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); - skb_queue_splice(&gateq, &gate_mpath->frame_queue); mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); - spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); if (!copy) return; -- cgit v1.2.1 From e687f61eedab8895e5669cb82cebe0253631cd8c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 12 Aug 2012 18:24:55 +0200 Subject: mac80211: add supported rates change notification in IBSS In IBSS it is possible that the supported rates set for a station changes over time (e.g. it gets first initialised as an empty set because of no available information about rates and updated later). In this case the driver has to be notified about the change in order to update its internal table accordingly (if needed). This behaviour is needed by all those drivers that handle rc internally but leave stations management to mac80211 Reported-by: Gui Iribarren Signed-off-by: Antonio Quartulli [Johannes - add docs, validate IBSS mode only, fix compilation] Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 3 +++ net/mac80211/ibss.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index a81117a83996..a81154d27291 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -528,6 +528,9 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, sdata = get_bss_sdata(sdata); check_sdata_in_driver(sdata); + WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && + sdata->vif.type != NL80211_IFTYPE_ADHOC); + trace_drv_sta_rc_update(local, sdata, sta, changed); if (local->ops->sta_rc_update) local->ops->sta_rc_update(&local->hw, &sdata->vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1ebda2f9e57b..a9d93285dba7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -459,8 +459,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } } - if (sta && rates_updated) + if (sta && rates_updated) { + drv_sta_rc_update(local, sdata, &sta->sta, + IEEE80211_RC_SUPP_RATES_CHANGED); rate_control_rate_init(sta); + } rcu_read_unlock(); } -- cgit v1.2.1 From 48613ece3d6a2613caa376f51477435cc080f3cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jul 2012 11:32:16 +0200 Subject: wireless: add radiotap A-MPDU status field Define the A-MPDU status field in radiotap, also update the radiotap parser for it and the MCS field that was apparently missed last time. Signed-off-by: Johannes Berg --- net/wireless/radiotap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index c4ad7958af52..7d604c06c3dc 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -41,6 +41,8 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = { [IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, }, + [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, }, /* * add more here as they are defined in radiotap.h */ -- cgit v1.2.1 From 4c29867790131c281ef96af507d85e3e5f829408 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jul 2012 11:34:31 +0200 Subject: mac80211: support A-MPDU status reporting Support getting A-MPDU status information from the drivers and reporting it to userspace via radiotap in the standard fields. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0cb4edee6af5..78bf6c7e80c8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -60,7 +60,9 @@ static inline int should_drop_frame(struct sk_buff *skb, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | + RX_FLAG_FAILED_PLCP_CRC | + RX_FLAG_AMPDU_IS_ZEROLEN)) return 1; if (unlikely(skb->len < 16 + present_fcs_len)) return 1; @@ -91,6 +93,13 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (status->flag & RX_FLAG_HT) /* HT info */ len += 3; + if (status->flag & RX_FLAG_AMPDU_DETAILS) { + /* padding */ + while (len & 3) + len++; + len += 8; + } + return len; } @@ -215,6 +224,37 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; *pos++ = status->rate_idx; } + + if (status->flag & RX_FLAG_AMPDU_DETAILS) { + u16 flags = 0; + + /* ensure 4 byte alignment */ + while ((pos - (u8 *)rthdr) & 3) + pos++; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS); + put_unaligned_le32(status->ampdu_reference, pos); + pos += 4; + if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN) + flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN; + if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN) + flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN; + if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN) + flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN; + if (status->flag & RX_FLAG_AMPDU_IS_LAST) + flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR) + flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) + flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN; + put_unaligned_le16(flags, pos); + pos += 2; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) + *pos++ = status->ampdu_delimiter_crc; + else + *pos++ = 0; + *pos++ = 0; + } } /* -- cgit v1.2.1 From 90bcf867ceef50155a1a14af4dc248061f6b8b94 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 15:48:38 +0200 Subject: mac80211: remove unneeded 'bssid' variable There's no need to copy the BSSID just to print it, remove the unnecessary variable. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9d60b4993635..b7c05bb3b672 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1696,7 +1696,6 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - u8 bssid[ETH_ALEN]; u8 frame_buf[DEAUTH_DISASSOC_LEN]; mutex_lock(&ifmgd->mtx); @@ -1705,9 +1704,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) return; } - memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - - sdata_info(sdata, "Connection to AP %pM lost\n", bssid); + sdata_info(sdata, "Connection to AP %pM lost\n", + ifmgd->associated->bssid); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, -- cgit v1.2.1 From 57eebdf3c28fe7134597acad529fc40f99b76601 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 15:50:46 +0200 Subject: mac80211: clean up CSA handling code Clean up the CSA handling code by moving some of it out of the if and using a C99 initializer for the struct passed to the driver method. While at it, also add a comment that we should wait for a beacon after switching the channel. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b7c05bb3b672..ae524c9081ee 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -712,6 +712,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->local->oper_channel; + /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_QUEUE_STOP_REASON_CSA); out: @@ -788,36 +789,33 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->local->csa_channel = new_ch; + ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + + if (sw_elem->mode) + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (sdata->local->ops->channel_switch) { /* use driver's channel switch callback */ - struct ieee80211_channel_switch ch_switch; - memset(&ch_switch, 0, sizeof(ch_switch)); - ch_switch.timestamp = timestamp; - if (sw_elem->mode) { - ch_switch.block_tx = true; - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - } - ch_switch.channel = new_ch; - ch_switch.count = sw_elem->count; - ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + struct ieee80211_channel_switch ch_switch = { + .timestamp = timestamp, + .block_tx = sw_elem->mode, + .channel = new_ch, + .count = sw_elem->count, + }; + drv_channel_switch(sdata->local, &ch_switch); return; } /* channel switch handled in software */ - if (sw_elem->count <= 1) { + if (sw_elem->count <= 1) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); - } else { - if (sw_elem->mode) - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + else mod_timer(&ifmgd->chswitch_timer, jiffies + msecs_to_jiffies(sw_elem->count * cbss->beacon_interval)); - } } static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, -- cgit v1.2.1 From 3049000b97bbfc90aa9ba413eadc4007e5bce2e0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 15:53:45 +0200 Subject: mac80211: fix CSA handling timer The time until the channel switch is in TU, not in milliseconds, so use TU_TO_EXP_TIME() to correctly program the timer. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ae524c9081ee..9e61fe127a33 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -813,9 +813,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - jiffies + - msecs_to_jiffies(sw_elem->count * - cbss->beacon_interval)); + TU_TO_EXP_TIME(sw_elem->count * + cbss->beacon_interval)); } static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, -- cgit v1.2.1 From 5bc1420b11903e9f8c470d3b33061b8de0c5c005 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 16:13:02 +0200 Subject: mac80211: check size of channel switch IE when parsing The channel switch IE has a fixed size, so we can discard it in parsing if it's not the right size and use the right struct pointer. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/mlme.c | 10 +++------- net/mac80211/util.c | 7 +++++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e22aee83ba53..793f03e15191 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1136,7 +1136,7 @@ struct ieee802_11_elems { u8 *prep; u8 *perr; struct ieee80211_rann_ie *rann; - u8 *ch_switch_elem; + struct ieee80211_channel_sw_ie *ch_switch_ie; u8 *country_elem; u8 *pwr_constr_elem; u8 *quiet_elem; /* first quite element */ @@ -1162,7 +1162,6 @@ struct ieee802_11_elems { u8 preq_len; u8 prep_len; u8 perr_len; - u8 ch_switch_elem_len; u8 country_elem_len; u8 pwr_constr_elem_len; u8 quiet_elem_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9e61fe127a33..b9cb8dbe34d9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2267,14 +2267,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && - (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, - ETH_ALEN) == 0)) { - struct ieee80211_channel_sw_ie *sw_elem = - (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; - ieee80211_sta_process_chanswitch(sdata, sw_elem, + if (elems->ch_switch_ie && + memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0) + ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie, bss, rx_status->mactime); - } } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 99e4258bdb26..7dff94e43a0c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -768,8 +768,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elem_parse_failed = true; break; case WLAN_EID_CHANNEL_SWITCH: - elems->ch_switch_elem = pos; - elems->ch_switch_elem_len = elen; + if (elen != sizeof(struct ieee80211_channel_sw_ie)) { + elem_parse_failed = true; + break; + } + elems->ch_switch_ie = (void *)pos; break; case WLAN_EID_QUIET: if (!elems->quiet_elem) { -- cgit v1.2.1 From cc74c0c7d6d623d0d3f13ef64895937edb7b3177 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 16:49:34 +0200 Subject: mac80211: make ieee80211_beacon_connection_loss_work static There's no need to declare the function in the header file since it's only used in a single place, so make it static. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/mlme.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 793f03e15191..2a80698b6324 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1430,7 +1430,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack); -void ieee80211_beacon_connection_loss_work(struct work_struct *work); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b9cb8dbe34d9..ddb2db5c5b05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1720,7 +1720,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->mtx); } -void ieee80211_beacon_connection_loss_work(struct work_struct *work) +static void ieee80211_beacon_connection_loss_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, -- cgit v1.2.1 From 98104fdeda63d57631c9f89e90a7b83b58fcee40 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 16 Jun 2012 00:19:54 +0200 Subject: cfg80211: add P2P Device abstraction In order to support using a different MAC address for the P2P Device address we must first have a P2P Device abstraction that can be assigned a MAC address. This abstraction will also be useful to support offloading P2P operations to the device, e.g. periodic listen for discoverability. Currently, the driver is responsible for assigning a MAC address to the P2P Device, but this could be changed by allowing a MAC address to be given to the NEW_INTERFACE command. As it has no associated netdev, a P2P Device can only be identified by its wdev identifier but the previous patches allowed using the wdev identifier in various APIs, e.g. remain-on-channel. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++ net/mac80211/util.c | 2 + net/wireless/chan.c | 7 ++- net/wireless/core.c | 53 ++++++++++++++++++++- net/wireless/mlme.c | 10 ++-- net/wireless/nl80211.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++--- net/wireless/util.c | 18 +++++++- 7 files changed, 201 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fbab7a84ca21..366d9d3e84c4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -449,6 +449,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: /* cannot happen */ WARN_ON(1); break; @@ -1146,6 +1147,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: break; + case NL80211_IFTYPE_P2P_DEVICE: + /* not yet supported */ case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: BUG(); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7dff94e43a0c..9a4e4e30ea6c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1390,6 +1390,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_MONITOR: /* ignore virtual */ break; + case NL80211_IFTYPE_P2P_DEVICE: + /* not yet supported */ case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: diff --git a/net/wireless/chan.c b/net/wireless/chan.c index d355f67d0cdd..2f876b9ee344 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -105,7 +105,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, ASSERT_WDEV_LOCK(wdev); - if (!netif_running(wdev->netdev)) + if (wdev->netdev && !netif_running(wdev->netdev)) return; switch (wdev->iftype) { @@ -143,6 +143,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_WDS: /* these interface types don't really have a channel */ return; + case NL80211_IFTYPE_P2P_DEVICE: + if (wdev->wiphy->features & + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL) + *chanmode = CHAN_MODE_EXCLUSIVE; + return; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); diff --git a/net/wireless/core.c b/net/wireless/core.c index 31b40cc4a9c3..91b300443f4b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -230,9 +230,24 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) - if (wdev->netdev) + list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (wdev->netdev) { dev_close(wdev->netdev); + continue; + } + /* otherwise, check iftype */ + switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!wdev->p2p_started) + break; + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + rdev->opencount--; + break; + default: + break; + } + } mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); @@ -407,6 +422,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) if (WARN_ON(wiphy->software_iftypes & types)) return -EINVAL; + /* Only a single P2P_DEVICE can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -734,6 +754,35 @@ static void wdev_cleanup_work(struct work_struct *work) dev_put(wdev->netdev); } +void cfg80211_unregister_wdev(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + ASSERT_RTNL(); + + if (WARN_ON(wdev->netdev)) + return; + + mutex_lock(&rdev->devlist_mtx); + list_del_rcu(&wdev->list); + rdev->devlist_generation++; + + switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!wdev->p2p_started) + break; + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + rdev->opencount--; + break; + default: + WARN_ON_ONCE(1); + break; + } + mutex_unlock(&rdev->devlist_mtx); +} +EXPORT_SYMBOL(cfg80211_unregister_wdev); + static struct device_type wiphy_type = { .name = "wlan", }; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1cdb1d5e6b0f..8fd0242ee169 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -736,7 +736,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct net_device *dev = wdev->netdev; const struct ieee80211_mgmt *mgmt; u16 stype; @@ -796,7 +795,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_AP_VLAN: - if (!ether_addr_equal(mgmt->bssid, dev->dev_addr)) + if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev))) err = -EINVAL; break; case NL80211_IFTYPE_MESH_POINT: @@ -809,6 +808,11 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * cfg80211 doesn't track the stations */ break; + case NL80211_IFTYPE_P2P_DEVICE: + /* + * fall through, P2P device only supports + * public action frames + */ default: err = -EOPNOTSUPP; break; @@ -819,7 +823,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return err; } - if (!ether_addr_equal(mgmt->sa, dev->dev_addr)) + if (!ether_addr_equal(mgmt->sa, wdev_address(wdev))) return -EINVAL; /* Transmit the Action frame as requested by user space */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 97026f3b215a..787aeaa902fe 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1100,6 +1100,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; } + CMD(start_p2p_device, START_P2P_DEVICE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -1748,13 +1749,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev && (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || - nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dev->dev_addr))) + nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name))) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -2021,8 +2022,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { + switch (type) { + case NL80211_IFTYPE_MESH_POINT: + if (!info->attrs[NL80211_ATTR_MESH_ID]) + break; wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -2031,6 +2034,26 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->mesh_id_up_len); wdev_unlock(wdev); + break; + case NL80211_IFTYPE_P2P_DEVICE: + /* + * P2P Device doesn't have a netdev, so doesn't go + * through the netdev notifier and must be added here + */ + mutex_init(&wdev->mtx); + INIT_LIST_HEAD(&wdev->event_list); + spin_lock_init(&wdev->event_lock); + INIT_LIST_HEAD(&wdev->mgmt_registrations); + spin_lock_init(&wdev->mgmt_registrations_lock); + + mutex_lock(&rdev->devlist_mtx); + wdev->identifier = ++rdev->wdev_id; + list_add_rcu(&wdev->list, &rdev->wdev_list); + rdev->devlist_generation++; + mutex_unlock(&rdev->devlist_mtx); + break; + default: + break; } if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, @@ -6053,6 +6076,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6099,6 +6123,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6195,6 +6220,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6810,6 +6836,68 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + int err; + + if (!rdev->ops->start_p2p_device) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + + if (wdev->p2p_started) + return 0; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_add_interface(rdev, wdev->iftype); + mutex_unlock(&rdev->devlist_mtx); + if (err) + return err; + + err = rdev->ops->start_p2p_device(&rdev->wiphy, wdev); + if (err) + return err; + + wdev->p2p_started = true; + mutex_lock(&rdev->devlist_mtx); + rdev->opencount++; + mutex_unlock(&rdev->devlist_mtx); + + return 0; +} + +static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + + if (!rdev->ops->stop_p2p_device) + return -EOPNOTSUPP; + + if (!wdev->p2p_started) + return 0; + + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + + mutex_lock(&rdev->devlist_mtx); + rdev->opencount--; + mutex_unlock(&rdev->devlist_mtx); + + if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } + + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -6817,7 +6905,7 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) #define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_NEED_WDEV 0x10 -/* If a netdev is associated, it must be UP */ +/* If a netdev is associated, it must be UP, P2P must be started */ #define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) @@ -6878,6 +6966,13 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, } dev_hold(dev); + } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { + if (!wdev->p2p_started) { + mutex_unlock(&cfg80211_mutex); + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } } cfg80211_lock_rdev(rdev); @@ -7439,7 +7534,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, - + { + .cmd = NL80211_CMD_START_P2P_DEVICE, + .doit = nl80211_start_p2p_device, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_P2P_DEVICE, + .doit = nl80211_stop_p2p_device, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/util.c b/net/wireless/util.c index ce393dd8c928..d7b672262b5f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -800,6 +800,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; + /* cannot change into P2P device type */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + if (!rdev->ops->change_virtual_intf || !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; @@ -877,6 +881,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NUM_NL80211_IFTYPES: /* not happening */ break; + case NL80211_IFTYPE_P2P_DEVICE: + WARN_ON(1); + break; } } @@ -1041,8 +1048,15 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { if (wdev_iter == wdev) continue; - if (!netif_running(wdev_iter->netdev)) - continue; + if (wdev_iter->netdev) { + if (!netif_running(wdev_iter->netdev)) + continue; + } else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) { + if (!wdev_iter->p2p_started) + continue; + } else { + WARN_ON(1); + } if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype)) continue; -- cgit v1.2.1 From f142c6b906da451ded2c7a8e17b2a0e6fee3e891 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jun 2012 20:07:15 +0200 Subject: mac80211: support P2P Device abstraction After cfg80211 got a P2P Device abstraction, add support to mac80211. Whether it really is supported or not will depend on whether or not the driver has support for it, but mac80211 needs to change to be able to support drivers that need a P2P Device. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 18 ++++ net/mac80211/driver-ops.h | 2 +- net/mac80211/ieee80211_i.h | 4 + net/mac80211/iface.c | 231 +++++++++++++++++++++++++++++---------------- net/mac80211/main.c | 9 ++ net/mac80211/offchannel.c | 6 ++ net/mac80211/rx.c | 14 ++- net/mac80211/status.c | 22 +++-- net/mac80211/trace.h | 2 +- net/mac80211/util.c | 14 ++- 10 files changed, 226 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8052a7ad03a6..69b322f6ca2e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -102,6 +102,18 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return 0; } +static int ieee80211_start_p2p_device(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + return ieee80211_do_open(wdev, true); +} + +static void ieee80211_stop_p2p_device(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -1774,6 +1786,7 @@ static int ieee80211_scan(struct wiphy *wiphy, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) @@ -2461,6 +2474,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (!sdata->u.mgd.associated) need_offchan = true; break; + case NL80211_IFTYPE_P2P_DEVICE: + need_offchan = true; + break; default: return -EOPNOTSUPP; } @@ -3013,6 +3029,8 @@ struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, + .start_p2p_device = ieee80211_start_p2p_device, + .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index a81154d27291..da9003b20004 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -9,7 +9,7 @@ static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev->name, sdata->flags); + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a80698b6324..0b81fa807179 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1080,6 +1080,8 @@ struct ieee80211_local { struct idr ack_status_frames; spinlock_t ack_status_lock; + struct ieee80211_sub_if_data __rcu *p2p_sdata; + /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1296,6 +1298,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); +int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up); +void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 366d9d3e84c4..152aeea14c85 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -100,6 +100,10 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) sdata->vif.bss_conf.idle = true; continue; } + + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + /* count everything else */ sdata->vif.bss_conf.idle = false; count++; @@ -121,7 +125,8 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) continue; if (sdata->old_idle == sdata->vif.bss_conf.idle) continue; @@ -204,6 +209,8 @@ static inline int identical_mac_addr_allowed(int type1, int type2) { return type1 == NL80211_IFTYPE_MONITOR || type2 == NL80211_IFTYPE_MONITOR || + type1 == NL80211_IFTYPE_P2P_DEVICE || + type2 == NL80211_IFTYPE_P2P_DEVICE || (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || (type1 == NL80211_IFTYPE_WDS && (type2 == NL80211_IFTYPE_WDS || @@ -406,9 +413,10 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) * an error on interface type changes that have been pre-checked, so most * checks should be in ieee80211_check_concurrent_iface. */ -static int ieee80211_do_open(struct net_device *dev, bool coming_up) +int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct net_device *dev = wdev->netdev; struct ieee80211_local *local = sdata->local; struct sta_info *sta; u32 changed = 0; @@ -443,13 +451,13 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_DEVICE: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_P2P_DEVICE: /* cannot happen */ WARN_ON(1); break; @@ -472,7 +480,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) * Copy the hopefully now-present MAC address to * this interface, if it has the special null one. */ - if (is_zero_ether_addr(dev->dev_addr)) { + if (dev && is_zero_ether_addr(dev->dev_addr)) { memcpy(dev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); @@ -537,7 +545,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) local->fif_probe_req++; } - changed |= ieee80211_reset_erp_info(sdata); + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); switch (sdata->vif.type) { @@ -548,6 +557,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) netif_carrier_off(dev); break; case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: break; default: netif_carrier_on(dev); @@ -584,6 +594,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) rate_control_rate_init(sta); netif_carrier_on(dev); + } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + rcu_assign_pointer(local->p2p_sdata, sdata); } /* @@ -609,7 +621,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) ieee80211_recalc_ps(local, -1); - netif_tx_start_all_queues(dev); + if (dev) + netif_tx_start_all_queues(dev); return 0; err_del_interface: @@ -639,7 +652,7 @@ static int ieee80211_open(struct net_device *dev) if (err) return err; - return ieee80211_do_open(dev, true); + return ieee80211_do_open(&sdata->wdev, true); } static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, @@ -660,7 +673,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(sdata->dev); + if (sdata->dev) + netif_tx_stop_all_queues(sdata->dev); ieee80211_roc_purge(sdata); @@ -699,14 +713,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, local->fif_probe_req--; } - netif_addr_lock_bh(sdata->dev); - spin_lock_bh(&local->filter_lock); - __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, - sdata->dev->addr_len); - spin_unlock_bh(&local->filter_lock); - netif_addr_unlock_bh(sdata->dev); + if (sdata->dev) { + netif_addr_lock_bh(sdata->dev); + spin_lock_bh(&local->filter_lock); + __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, + sdata->dev->addr_len); + spin_unlock_bh(&local->filter_lock); + netif_addr_unlock_bh(sdata->dev); - ieee80211_configure_filter(local); + ieee80211_configure_filter(local); + } del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); @@ -767,6 +783,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); ieee80211_configure_filter(local); break; + case NL80211_IFTYPE_P2P_DEVICE: + /* relies on synchronize_rcu() below */ + rcu_assign_pointer(local->p2p_sdata, NULL); + /* fall through */ default: flush_work(&sdata->work); /* @@ -877,9 +897,8 @@ static void ieee80211_set_multicast_list(struct net_device *dev) * Called when the netdev is removed or, by the code below, before * the interface type changes. */ -static void ieee80211_teardown_sdata(struct net_device *dev) +static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; int flushed; int i; @@ -900,6 +919,11 @@ static void ieee80211_teardown_sdata(struct net_device *dev) WARN_ON(flushed); } +static void ieee80211_uninit(struct net_device *dev) +{ + ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev)); +} + static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb) { @@ -909,7 +933,7 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev, static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, - .ndo_uninit = ieee80211_teardown_sdata, + .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, @@ -940,7 +964,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, - .ndo_uninit = ieee80211_teardown_sdata, + .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, @@ -1099,7 +1123,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; sdata->vif.p2p = false; - sdata->dev->netdev_ops = &ieee80211_dataif_ops; sdata->wdev.iftype = type; sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); @@ -1107,8 +1130,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->noack_map = 0; - /* only monitor differs */ - sdata->dev->type = ARPHRD_ETHER; + /* only monitor/p2p-device differ */ + if (sdata->dev) { + sdata->dev->netdev_ops = &ieee80211_dataif_ops; + sdata->dev->type = ARPHRD_ETHER; + } skb_queue_head_init(&sdata->skb_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); @@ -1146,9 +1172,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: - break; case NL80211_IFTYPE_P2P_DEVICE: - /* not yet supported */ + break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: BUG(); @@ -1215,7 +1240,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, ieee80211_do_stop(sdata, false); - ieee80211_teardown_sdata(sdata->dev); + ieee80211_teardown_sdata(sdata); ret = drv_change_interface(local, sdata, internal_type, p2p); if (ret) @@ -1230,7 +1255,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); - err = ieee80211_do_open(sdata->dev, false); + err = ieee80211_do_open(&sdata->wdev, false); WARN(err, "type change: do_open returned %d", err); return ret; @@ -1257,7 +1282,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, return ret; } else { /* Purge and reset type-dependent state. */ - ieee80211_teardown_sdata(sdata->dev); + ieee80211_teardown_sdata(sdata); ieee80211_setup_sdata(sdata, type); } @@ -1273,8 +1298,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, } static void ieee80211_assign_perm_addr(struct ieee80211_local *local, - struct net_device *dev, - enum nl80211_iftype type) + u8 *perm_addr, enum nl80211_iftype type) { struct ieee80211_sub_if_data *sdata; u64 mask, start, addr, val, inc; @@ -1283,7 +1307,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, int i; /* default ... something at least */ - memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); if (is_zero_ether_addr(local->hw.wiphy->addr_mask) && local->hw.wiphy->n_addresses <= 1) @@ -1302,7 +1326,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_AP) continue; - memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN); + memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); break; } /* keep default if no AP interface present */ @@ -1321,7 +1345,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } if (!used) { - memcpy(dev->perm_addr, + memcpy(perm_addr, local->hw.wiphy->addresses[i].addr, ETH_ALEN); break; @@ -1372,7 +1396,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } if (!used) { - memcpy(dev->perm_addr, tmp_addr, ETH_ALEN); + memcpy(perm_addr, tmp_addr, ETH_ALEN); break; } addr = (start & ~mask) | (val & mask); @@ -1388,49 +1412,68 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) { - struct net_device *ndev; + struct net_device *ndev = NULL; struct ieee80211_sub_if_data *sdata = NULL; int ret, i; int txqs = 1; ASSERT_RTNL(); - if (local->hw.queues >= IEEE80211_NUM_ACS) - txqs = IEEE80211_NUM_ACS; - - ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup, txqs, 1); - if (!ndev) - return -ENOMEM; - dev_net_set(ndev, wiphy_net(local->hw.wiphy)); - - ndev->needed_headroom = local->tx_headroom + - 4*6 /* four MAC addresses */ - + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ - + 6 /* mesh */ - + 8 /* rfc1042/bridge tunnel */ - - ETH_HLEN /* ethernet hard_header_len */ - + IEEE80211_ENCRYPT_HEADROOM; - ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; - - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; - - ieee80211_assign_perm_addr(local, ndev, type); - memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - - /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ - sdata = netdev_priv(ndev); - ndev->ieee80211_ptr = &sdata->wdev; - memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); - memcpy(sdata->name, ndev->name, IFNAMSIZ); + if (type == NL80211_IFTYPE_P2P_DEVICE) { + struct wireless_dev *wdev; + + sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, + GFP_KERNEL); + if (!sdata) + return -ENOMEM; + wdev = &sdata->wdev; + + sdata->dev = NULL; + strlcpy(sdata->name, name, IFNAMSIZ); + ieee80211_assign_perm_addr(local, wdev->address, type); + memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); + } else { + if (local->hw.queues >= IEEE80211_NUM_ACS) + txqs = IEEE80211_NUM_ACS; + + ndev = alloc_netdev_mqs(sizeof(*sdata) + + local->hw.vif_data_size, + name, ieee80211_if_setup, txqs, 1); + if (!ndev) + return -ENOMEM; + dev_net_set(ndev, wiphy_net(local->hw.wiphy)); + + ndev->needed_headroom = local->tx_headroom + + 4*6 /* four MAC addresses */ + + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ + + 6 /* mesh */ + + 8 /* rfc1042/bridge tunnel */ + - ETH_HLEN /* ethernet hard_header_len */ + + IEEE80211_ENCRYPT_HEADROOM; + ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + + ret = dev_alloc_name(ndev, ndev->name); + if (ret < 0) { + free_netdev(ndev); + return ret; + } + + ieee80211_assign_perm_addr(local, ndev->perm_addr, type); + memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); + SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); + + /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */ + sdata = netdev_priv(ndev); + ndev->ieee80211_ptr = &sdata->wdev; + memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); + memcpy(sdata->name, ndev->name, IFNAMSIZ); + + sdata->dev = ndev; + } /* initialise type-independent data */ sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; - sdata->dev = ndev; #ifdef CONFIG_INET sdata->arp_filter_state = true; #endif @@ -1459,17 +1502,21 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); - if (params) { - ndev->ieee80211_ptr->use_4addr = params->use_4addr; - if (type == NL80211_IFTYPE_STATION) - sdata->u.mgd.use_4addr = params->use_4addr; - } + if (ndev) { + if (params) { + ndev->ieee80211_ptr->use_4addr = params->use_4addr; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = params->use_4addr; + } - ndev->features |= local->hw.netdev_features; + ndev->features |= local->hw.netdev_features; - ret = register_netdevice(ndev); - if (ret) - goto fail; + ret = register_netdevice(ndev); + if (ret) { + free_netdev(ndev); + return ret; + } + } mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); @@ -1479,10 +1526,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, *new_wdev = &sdata->wdev; return 0; - - fail: - free_netdev(ndev); - return ret; } void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) @@ -1494,7 +1537,21 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->iflist_mtx); synchronize_rcu(); - unregister_netdevice(sdata->dev); + + if (sdata->dev) { + unregister_netdevice(sdata->dev); + } else { + cfg80211_unregister_wdev(&sdata->wdev); + kfree(sdata); + } +} + +void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata) +{ + if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state))) + return; + ieee80211_do_stop(sdata, true); + ieee80211_teardown_sdata(sdata); } /* @@ -1505,6 +1562,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; LIST_HEAD(unreg_list); + LIST_HEAD(wdev_list); ASSERT_RTNL(); @@ -1512,11 +1570,20 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); - unregister_netdevice_queue(sdata->dev, &unreg_list); + if (sdata->dev) + unregister_netdevice_queue(sdata->dev, &unreg_list); + else + list_add(&sdata->list, &wdev_list); } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); list_del(&unreg_list); + + list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { + list_del(&sdata->list); + cfg80211_unregister_wdev(&sdata->wdev); + kfree(sdata); + } } static int netdev_notify(struct notifier_block *nb, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c26e231c733a..e706f9e5b051 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -207,6 +207,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; else if (ieee80211_vif_is_mesh(&sdata->vif)) { sdata->vif.bss_conf.bssid = zero; + } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.bss_conf.bssid = sdata->vif.addr; + WARN_ONCE(changed & ~(BSS_CHANGED_IDLE), + "P2P Device BSS changed %#x", changed); } else { WARN_ON(1); return; @@ -514,6 +518,11 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { BIT(IEEE80211_STYPE_AUTH >> 4) | BIT(IEEE80211_STYPE_DEAUTH >> 4), }, + [NL80211_IFTYPE_P2P_DEVICE] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, }; static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 635c3250c668..507121dad082 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -116,6 +116,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); @@ -144,6 +147,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 78bf6c7e80c8..f5258ebc15be 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2812,8 +2812,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!bssid) { if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) return 0; - } else if (!ieee80211_bssid_match(bssid, - sdata->vif.addr)) { + } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { /* * Accept public action frames even when the * BSSID doesn't match, this is used for P2P @@ -2833,9 +2832,18 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; + case NL80211_IFTYPE_P2P_DEVICE: + if (!ieee80211_is_public_action(hdr, skb->len) && + !ieee80211_is_probe_req(hdr->frame_control) && + !ieee80211_is_probe_resp(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) + return 0; + if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; + break; default: /* should never get here */ - WARN_ON(1); + WARN_ON_ONCE(1); break; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8cd72914cdaf..b0801b7d572d 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -519,19 +519,27 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) u64 cookie = (unsigned long)skb; acked = info->flags & IEEE80211_TX_STAT_ACK; - /* - * TODO: When we have non-netdev frame TX, - * we cannot use skb->dev->ieee80211_ptr - */ - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) + ieee80211_is_qos_nullfunc(hdr->frame_control)) { cfg80211_probe_status(skb->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); - else + } else if (skb->dev) { cfg80211_mgmt_tx_status( skb->dev->ieee80211_ptr, cookie, skb->data, skb->len, acked, GFP_ATOMIC); + } else { + struct ieee80211_sub_if_data *p2p_sdata; + + rcu_read_lock(); + + p2p_sdata = rcu_dereference(local->p2p_sdata); + if (p2p_sdata) { + cfg80211_mgmt_tx_status( + &p2p_sdata->wdev, cookie, skb->data, + skb->len, acked, GFP_ATOMIC); + } + rcu_read_unlock(); + } } if (unlikely(info->ack_frame_id)) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c6d33b55b2df..65e9a2a1a3e1 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -24,7 +24,7 @@ __string(vif_name, sdata->dev ? sdata->dev->name : "") #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : "") + __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 9a4e4e30ea6c..79bce870ad78 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -276,6 +276,9 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) list_for_each_entry_rcu(sdata, &local->interfaces, list) { int ac; + if (!sdata->dev) + continue; + if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) continue; @@ -364,6 +367,9 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, list_for_each_entry_rcu(sdata, &local->interfaces, list) { int ac; + if (!sdata->dev) + continue; + for (ac = 0; ac < n_acs; ac++) { if (sdata->vif.hw_queue[ac] == queue || sdata->vif.cab_queue == queue) @@ -902,7 +908,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, drv_conf_tx(local, sdata, ac, &qparam); } - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type != NL80211_IFTYPE_MONITOR && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, @@ -1391,7 +1398,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* ignore virtual */ break; case NL80211_IFTYPE_P2P_DEVICE: - /* not yet supported */ + changed = BSS_CHANGED_IDLE; + break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -1578,6 +1586,8 @@ void ieee80211_recalc_smps(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; if (sdata->vif.type != NL80211_IFTYPE_STATION) goto set; -- cgit v1.2.1 From 6d71117a279aa30574a8af6c7348570c292285c2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Jun 2012 17:19:44 +0200 Subject: mac80211: add IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF Some devices like the current iwlwifi implementation require that the P2P interface address match the P2P Device address (only one P2P interface is supported.) Add the HW flag IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF that allows drivers to request that P2P Interfaces added while a P2P Device is active get the same MAC address by default. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 152aeea14c85..59f8adc2aa5f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1313,7 +1313,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, local->hw.wiphy->n_addresses <= 1) return; - mutex_lock(&local->iflist_mtx); switch (type) { @@ -1331,6 +1330,19 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } /* keep default if no AP interface present */ break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) { + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + continue; + if (!ieee80211_sdata_running(sdata)) + continue; + memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); + goto out_unlock; + } + } + /* otherwise fall through */ default: /* assign a new address if possible -- try n_addresses first */ for (i = 0; i < local->hw.wiphy->n_addresses; i++) { @@ -1405,6 +1417,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } + out_unlock: mutex_unlock(&local->iflist_mtx); } -- cgit v1.2.1 From d348f69f59af769c405c2f43a2d326d7123ef75a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Aug 2012 14:51:44 +0200 Subject: mac80211: simplify buffers in aes_128_cmac_vector There's no need to use a single scratch buffer and calculate offsets into it, just use two separate buffers for the separate variables. Signed-off-by: Johannes Berg --- net/mac80211/aes_cmac.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 8dfd70d8fcfb..a04752e91023 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -38,14 +38,10 @@ static void gf_mulx(u8 *pad) static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, const u8 *addr[], const size_t *len, u8 *mac) { - u8 scratch[2 * AES_BLOCK_SIZE]; - u8 *cbc, *pad; + u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; size_t i, e, left, total_len; - cbc = scratch; - pad = scratch + AES_BLOCK_SIZE; - memset(cbc, 0, AES_BLOCK_SIZE); total_len = 0; -- cgit v1.2.1 From 466f310d100ff54f346c1be481af9935c42467b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 25 Jul 2012 13:51:49 +0200 Subject: mac80211: mesh: don't use global channel type Using local->_oper_channel_type in the mesh code is completely wrong as this value is the combination of the various interface channel types and can be a different value from the mesh interface in case there are multiple virtual interfaces. Use sdata->vif.bss_conf.channel_type instead as it tracks the per-vif channel type. Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 11 ++++++----- net/mac80211/mesh_plink.c | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 035cd0c8ce33..f2d0d213bcfb 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -109,11 +109,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, /* Disallow HT40+/- mismatch */ if (ie->ht_operation && - (local->_oper_channel_type == NL80211_CHAN_HT40MINUS || - local->_oper_channel_type == NL80211_CHAN_HT40PLUS) && + (sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40MINUS || + sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40PLUS) && (sta_channel_type == NL80211_CHAN_HT40MINUS || sta_channel_type == NL80211_CHAN_HT40PLUS) && - local->_oper_channel_type != sta_channel_type) + sdata->vif.bss_conf.channel_type != sta_channel_type) goto mismatch; return true; @@ -375,7 +375,7 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb, sband = local->hw.wiphy->bands[local->oper_channel->band]; if (!sband->ht_cap.ht_supported || - local->_oper_channel_type == NL80211_CHAN_NO_HT) + sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) @@ -392,7 +392,8 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb, { struct ieee80211_local *local = sdata->local; struct ieee80211_channel *channel = local->oper_channel; - enum nl80211_channel_type channel_type = local->_oper_channel_type; + enum nl80211_channel_type channel_type = + sdata->vif.bss_conf.channel_type; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[channel->band]; struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5fd1250f7866..54ce1af491eb 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -117,7 +117,7 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - if (local->_oper_channel_type == NL80211_CHAN_NO_HT) + if (sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) return 0; rcu_read_lock(); @@ -147,7 +147,8 @@ out: if (non_ht_sta) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED; - else if (ht20_sta && local->_oper_channel_type > NL80211_CHAN_HT20) + else if (ht20_sta && + sdata->vif.bss_conf.channel_type > NL80211_CHAN_HT20) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ; else ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; @@ -379,7 +380,7 @@ static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, sta->sta.supp_rates[band] = rates; if (elems->ht_cap_elem && - sdata->local->_oper_channel_type != NL80211_CHAN_NO_HT) + sdata->vif.bss_conf.channel_type != NL80211_CHAN_NO_HT) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, &sta->sta.ht_cap); -- cgit v1.2.1 From e31583cdf0e2eb71c44b1288a2d93405f972da68 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:07:46 +0200 Subject: mac80211: remove almost unused local variable In ieee80211_beacon_get_tim() we can use the txrc.sband instead of a separate local variable. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2d004ba0615e..2d2f45ecca4d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2300,12 +2300,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; struct beacon_data *beacon; - struct ieee80211_supported_band *sband; enum ieee80211_band band = local->oper_channel->band; struct ieee80211_tx_rate_control txrc; - sband = local->hw.wiphy->bands[band]; - rcu_read_lock(); sdata = vif_to_sdata(vif); @@ -2452,12 +2449,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memset(&txrc, 0, sizeof(txrc)); txrc.hw = hw; - txrc.sband = sband; + txrc.sband = local->hw.wiphy->bands[band]; txrc.bss_conf = &sdata->vif.bss_conf; txrc.skb = skb; txrc.reported_rate.idx = -1; txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; - if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) + if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; -- cgit v1.2.1 From 9e99a127b5724d7a2cd9d2973b10981d56e1d647 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:22:06 +0200 Subject: mac80211: remove freq/chantype from debugfs You can now get these values through iw, and they conflict with multi-channel work. Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index b8dfb440c8ef..97173f8144d4 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -63,8 +63,6 @@ DEBUGFS_READONLY_FILE(user_power, "%d", local->user_power_level); DEBUGFS_READONLY_FILE(power, "%d", local->hw.conf.power_level); -DEBUGFS_READONLY_FILE(frequency, "%d", - local->hw.conf.channel->center_freq); DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, "%#08x", @@ -91,33 +89,6 @@ static const struct file_operations reset_ops = { .llseek = noop_llseek, }; -static ssize_t channel_type_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - const char *buf; - - switch (local->hw.conf.channel_type) { - case NL80211_CHAN_NO_HT: - buf = "no ht\n"; - break; - case NL80211_CHAN_HT20: - buf = "ht20\n"; - break; - case NL80211_CHAN_HT40MINUS: - buf = "ht40-\n"; - break; - case NL80211_CHAN_HT40PLUS: - buf = "ht40+\n"; - break; - default: - buf = "???"; - break; - } - - return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); -} - static ssize_t hwflags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { @@ -205,7 +176,6 @@ static ssize_t queues_read(struct file *file, char __user *user_buf, } DEBUGFS_READONLY_FILE_OPS(hwflags); -DEBUGFS_READONLY_FILE_OPS(channel_type); DEBUGFS_READONLY_FILE_OPS(queues); /* statistics stuff */ @@ -272,12 +242,10 @@ void debugfs_hw_add(struct ieee80211_local *local) local->debugfs.keys = debugfs_create_dir("keys", phyd); - DEBUGFS_ADD(frequency); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); - DEBUGFS_ADD(channel_type); DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); -- cgit v1.2.1 From f9e6e95b63758202fe2ce43bd7c922db2ff50f80 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jul 2012 14:29:21 +0200 Subject: mac80211: use oper_channel in rate init Using hw.conf.channel is wrong as it could be the temporary channel if the station is added from the workqueue while the device is already on another channel. Use oper_channel instead. Signed-off-by: Johannes Berg --- net/mac80211/rate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 6e4fd32c6617..10de668eb9f6 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -56,7 +56,7 @@ static inline void rate_control_rate_init(struct sta_info *sta) if (!ref) return; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; ref->ops->rate_init(ref->priv, sband, ista, priv_sta); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); -- cgit v1.2.1 From 3d01be72e6fe372a0602221090707a1f04c44646 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:27:39 +0200 Subject: mac80211: don't assume channel is set in tracing With the move to multi-channel and away from drv_config(), hw.conf.channel will not always be set, only for devices using the current API instead of the new channel context APIs. Check the channel is set before adding its frequency to the trace data. Also break some overly long lines in the code. Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 65e9a2a1a3e1..18d9c8a52e9e 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -274,9 +274,12 @@ TRACE_EVENT(drv_config, __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; __entry->max_sleep_period = local->hw.conf.max_sleep_period; __entry->listen_interval = local->hw.conf.listen_interval; - __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count; - __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; - __entry->center_freq = local->hw.conf.channel->center_freq; + __entry->long_frame_max_tx_count = + local->hw.conf.long_frame_max_tx_count; + __entry->short_frame_max_tx_count = + local->hw.conf.short_frame_max_tx_count; + __entry->center_freq = local->hw.conf.channel ? + local->hw.conf.channel->center_freq : 0; __entry->channel_type = local->hw.conf.channel_type; __entry->smps = local->hw.conf.smps_mode; ), -- cgit v1.2.1 From 4797c7ba93e4049cdda18045c01bbe563aafff69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:31:19 +0200 Subject: mac80211: use RX status band instead of current band Even for single-channel devices it is possible that we switch the channel temporarily (e.g. for scanning) but while doing so process a received frame that was still received on the old channel, so checking the current band is racy. Use the band from status instead. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f5258ebc15be..b382605c5733 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2308,7 +2308,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: - if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) + if (status->band != IEEE80211_BAND_5GHZ) break; if (sdata->vif.type != NL80211_IFTYPE_STATION) -- cgit v1.2.1 From 9b8648704358e42f9865cd6de7cedb5ac0db67e7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:38:32 +0200 Subject: mac80211: check operating channel in scan The optimisation of scanning only on the current channel should check the operating channel. Also modify it to compare channel pointer rather than the frequency. Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bcaee5d12839..4b75ddeef6b1 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -479,11 +479,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && - (req->channels[0]->center_freq == - local->hw.conf.channel->center_freq)) { - - /* If we are scanning only on the current channel, then - * we do not need to stop normal activities + (req->channels[0] == local->oper_channel)) { + /* + * If we are scanning only on the operating channel + * then we do not need to stop normal activities */ unsigned long next_delay; -- cgit v1.2.1 From c0af07340aae5db9f976bfe71e2e9bcab3169409 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jul 2012 14:42:10 +0200 Subject: mac80211: convert ops checks to WARN_ON There's no need to BUG_ON when a driver registers invalid operations, warn and return an error. Signed-off-by: Johannes Berg --- net/mac80211/main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e706f9e5b051..bd7529363193 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -545,6 +545,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, int priv_size, i; struct wiphy *wiphy; + if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config || + !ops->add_interface || !ops->remove_interface || + !ops->configure_filter)) + return NULL; + if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove))) return NULL; @@ -597,13 +602,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); - BUG_ON(!ops->tx); - BUG_ON(!ops->start); - BUG_ON(!ops->stop); - BUG_ON(!ops->config); - BUG_ON(!ops->add_interface); - BUG_ON(!ops->remove_interface); - BUG_ON(!ops->configure_filter); local->ops = ops; /* set up some defaults */ -- cgit v1.2.1 From fe94fe05e9fb7c1bea482d1b0fd09029a711cce2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Jul 2012 12:26:34 +0200 Subject: mac80211: pass channel to ieee80211_send_probe_req In multi-channel scenarios, the channel that we will transmit a probe request on isn't always the current channel (which will be NULL anyway) but will instead be the channel that the AP is on. Pass the channel to the ieee80211_send_probe_req() function so it can be used in the different scenarios. The scan code continues to pass the current channel, of course. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 6 ++++-- net/mac80211/scan.c | 3 ++- net/mac80211/util.c | 6 +++--- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0b81fa807179..204bfedba306 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1472,7 +1472,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck); + u32 ratemask, bool directed, bool no_cck, + struct ieee80211_channel *channel); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ddb2db5c5b05..b65b2149b23b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1577,7 +1577,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ssid_len = ssid[1]; ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL, - 0, (u32) -1, true, false); + 0, (u32) -1, true, false, + ifmgd->associated->channel); } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); @@ -2704,7 +2705,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, false); + NULL, 0, (u32) -1, true, false, + auth_data->bss->channel); } auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4b75ddeef6b1..ef1d69306315 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -416,7 +416,8 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - local->scan_req->no_cck); + local->scan_req->no_cck, + local->hw.conf.channel); /* * After sending probe requests, wait for probe responses diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 79bce870ad78..471fb0516c99 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1163,12 +1163,12 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck) + u32 ratemask, bool directed, bool no_cck, + struct ieee80211_channel *channel) { struct sk_buff *skb; - skb = ieee80211_build_probe_req(sdata, dst, ratemask, - sdata->local->hw.conf.channel, + skb = ieee80211_build_probe_req(sdata, dst, ratemask, channel, ssid, ssid_len, ie, ie_len, directed); if (skb) { -- cgit v1.2.1 From dcf33963c48e1959c83fda84e336dbb000eefa3f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Jul 2012 15:11:56 +0200 Subject: mac80211: clean up ieee80211_subif_start_xmit There's no need to carry around a return value that is always NETDEV_TX_OK anyway. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2d2f45ecca4d..3b807bcb8fc9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1719,7 +1719,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info; - int ret = NETDEV_TX_BUSY, head_need; + int head_need; u16 ethertype, hdrlen, meshhdrlen = 0; __le16 fc; struct ieee80211_hdr hdr; @@ -1735,10 +1735,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, u32 info_flags = 0; u16 info_id = 0; - if (unlikely(skb->len < ETH_HLEN)) { - ret = NETDEV_TX_OK; + if (unlikely(skb->len < ETH_HLEN)) goto fail; - } /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ @@ -1786,7 +1784,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { /* Do not send frames with mesh_ttl == 0 */ sdata->u.mesh.mshstats.dropped_frames_ttl++; - ret = NETDEV_TX_OK; goto fail; } rcu_read_lock(); @@ -1879,10 +1876,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (tdls_direct) { /* link during setup - throw out frames to peer */ - if (!tdls_auth) { - ret = NETDEV_TX_OK; + if (!tdls_auth) goto fail; - } /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1916,7 +1911,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 24; break; default: - ret = NETDEV_TX_OK; goto fail; } @@ -1961,7 +1955,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, I802_DEBUG_INC(local->tx_handlers_drop_unauth_port); - ret = NETDEV_TX_OK; goto fail; } @@ -2016,10 +2009,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, skb = skb_clone(skb, GFP_ATOMIC); kfree_skb(tmp_skb); - if (!skb) { - ret = NETDEV_TX_OK; + if (!skb) goto fail; - } } hdr.frame_control = fc; @@ -2122,10 +2113,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; fail: - if (ret == NETDEV_TX_OK) - dev_kfree_skb(skb); - - return ret; + dev_kfree_skb(skb); + return NETDEV_TX_OK; } -- cgit v1.2.1 From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 19 Aug 2012 12:29:16 -0700 Subject: libceph: delay debugfs initialization until we learn global_id The debugfs directory includes the cluster fsid and our unique global_id. We need to delay the initialization of the debug entry until we have learned both the fsid and our global_id from the monitor or else the second client can't create its debugfs entry and will fail (and multiple client instances aren't properly reflected in debugfs). Reported by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Yehuda Sadeh --- net/ceph/ceph_common.c | 1 - net/ceph/debugfs.c | 4 ++++ net/ceph/mon_client.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 50 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69e38db28e5f..a8020293f342 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) return -1; } } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a01121..38b5dc1823d4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, client->monc.auth->global_id); + dout("ceph_debugfs_client_init %p %s\n", client, name); + + BUG_ON(client->debugfs_dir); client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); if (!client->debugfs_dir) goto out; @@ -234,6 +237,7 @@ out: void ceph_debugfs_client_cleanup(struct ceph_client *client) { + dout("ceph_debugfs_client_cleanup %p\n", client); debugfs_remove(client->debugfs_osdmap); debugfs_remove(client->debugfs_monmap); debugfs_remove(client->osdc.debugfs_file); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 105d533b55f3..900ea0f043fc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -310,6 +310,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_open_session); +/* + * We require the fsid and global_id in order to initialize our + * debugfs dir. + */ +static bool have_debugfs_info(struct ceph_mon_client *monc) +{ + dout("have_debugfs_info fsid %d globalid %lld\n", + (int)monc->client->have_fsid, monc->auth->global_id); + return monc->client->have_fsid && monc->auth->global_id > 0; +} + /* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. @@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_client *client = monc->client; struct ceph_monmap *monmap = NULL, *old = monc->monmap; void *p, *end; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); + dout("handle_monmap\n"); p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, if (!client->have_fsid) { client->have_fsid = true; + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); - /* - * do debugfs initialization without mutex to avoid - * creating a locking dependency - */ - ceph_debugfs_client_init(client); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } + goto out_unlocked; } out: @@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, { int ret; int was_auth = 0; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); if (monc->auth->ops) was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; @@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc, __send_subscribe(monc); __resend_generic_request(monc); } + + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } } static int __validate_auth(struct ceph_mon_client *monc) -- cgit v1.2.1 From 43829731dd372d04d6706c51052b9dabab9ca356 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:24 -0700 Subject: workqueue: deprecate flush[_delayed]_work_sync() flush[_delayed]_work_sync() are now spurious. Mark them deprecated and convert all users to flush[_delayed]_work(). If you're cc'd and wondering what's going on: Now all workqueues are non-reentrant and the regular flushes guarantee that the work item is not pending or running on any CPU on return, so there's no reason to use the sync flushes at all and they're going away. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Cc: Russell King Cc: Paul Mundt Cc: Ian Campbell Cc: Jens Axboe Cc: Mattia Dongili Cc: Kent Yoder Cc: David Airlie Cc: Jiri Kosina Cc: Karsten Keil Cc: Bryan Wu Cc: Benjamin Herrenschmidt Cc: Alasdair Kergon Cc: Mauro Carvalho Chehab Cc: Florian Tobias Schandinat Cc: David Woodhouse Cc: "David S. Miller" Cc: linux-wireless@vger.kernel.org Cc: Anton Vorontsov Cc: Sangbeom Kim Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: Eric Van Hensbergen Cc: Takashi Iwai Cc: Steven Whitehouse Cc: Petr Vandrovec Cc: Mark Fasheh Cc: Christoph Hellwig Cc: Avi Kivity --- net/9p/trans_fd.c | 2 +- net/dsa/dsa.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6449bae15702..505f0ce3f10b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - flush_work_sync(&p9_poll_work); + flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 88e7c2f3fa0d..45295ca09571 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev) if (dst->link_poll_needed) del_timer_sync(&dst->link_poll_timer); - flush_work_sync(&dst->link_poll_work); + flush_work(&dst->link_poll_work); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.2.1 From 31f470738bf9fefc9399a45710c74322121119ac Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 7 Aug 2012 18:05:06 +0300 Subject: Bluetooth: trivial: Use preferred method for NULL check Use standard bluetooth way to check NULL pointer !var instead of var == NULL. Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/af_bluetooth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1b88fe41f4b1..9d49ee6d7219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -622,7 +622,7 @@ static int bt_seq_open(struct inode *inode, struct file *file) sk_list = PDE(inode)->data; s = __seq_open_private(file, &bt_seq_ops, sizeof(struct bt_seq_state)); - if (s == NULL) + if (!s) return -ENOMEM; s->l = sk_list; @@ -644,7 +644,7 @@ int bt_procfs_init(struct module* module, struct net *net, const char *name, sk_list->fops.release = seq_release_private; pde = proc_net_fops_create(net, name, 0, &sk_list->fops); - if (pde == NULL) + if (!pde) return -ENOMEM; pde->data = sk_list; -- cgit v1.2.1 From fa1bd91809d58b3c183611556219fafd93c08625 Mon Sep 17 00:00:00 2001 From: Mikel Astiz Date: Thu, 9 Aug 2012 09:52:29 +0200 Subject: Bluetooth: Fix minor coding style in hci_event.c Replace the status checks with the short form of the boolean expression. Signed-off-by: Mikel Astiz Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 32e21ad36a68..bfa9bcc0f5ef 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -303,7 +303,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (status != 0) { + if (status) { mgmt_write_scan_failed(hdev, param, status); hdev->discov_timeout = 0; goto done; @@ -925,7 +925,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status); - if (rp->status != 0) + if (rp->status) goto unlock; cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY); @@ -1893,7 +1893,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) && (conn->type == ACL_LINK || conn->type == LE_LINK)) { - if (ev->status != 0) + if (ev->status) mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); else @@ -3262,7 +3262,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, * initiated the authentication. A traditional auth_complete * event gets always produced as initiator and is also mapped to * the mgmt_auth_failed event */ - if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status != 0) + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status) mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); -- cgit v1.2.1 From f0d6a0ea330617454032d6e2ed48759858a44427 Mon Sep 17 00:00:00 2001 From: Mikel Astiz Date: Thu, 9 Aug 2012 09:52:30 +0200 Subject: Bluetooth: mgmt: Add device disconnect reason MGMT_EV_DEVICE_DISCONNECTED will now expose the disconnection reason to userland, distinguishing four possible values: 0x00 Reason not known or unspecified 0x01 Connection timeout 0x02 Connection terminated by local host 0x03 Connection terminated by remote host Note that the local/remote distinction just determines which side terminated the low-level connection, regardless of the disconnection of the higher-level profiles. This can sometimes be misleading and thus must be used with care. For example, some hardware combinations would report a locally initiated disconnection even if the user turned Bluetooth off in the remote side. Signed-off-by: Mikel Astiz Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 26 +++++++++++++++++++++++--- net/bluetooth/mgmt.c | 9 +++++---- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index bfa9bcc0f5ef..48d730228c2f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -29,6 +29,7 @@ #include #include +#include /* Handle HCI Event packets */ @@ -1875,6 +1876,22 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } } +static u8 hci_to_mgmt_reason(u8 err) +{ + switch (err) { + case HCI_ERROR_CONNECTION_TIMEOUT: + return MGMT_DEV_DISCONN_TIMEOUT; + case HCI_ERROR_REMOTE_USER_TERM: + case HCI_ERROR_REMOTE_LOW_RESOURCES: + case HCI_ERROR_REMOTE_POWER_OFF: + return MGMT_DEV_DISCONN_REMOTE; + case HCI_ERROR_LOCAL_HOST_TERM: + return MGMT_DEV_DISCONN_LOCAL_HOST; + default: + return MGMT_DEV_DISCONN_UNKNOWN; + } +} + static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (void *) skb->data; @@ -1893,12 +1910,15 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) && (conn->type == ACL_LINK || conn->type == LE_LINK)) { - if (ev->status) + if (ev->status) { mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); - else + } else { + u8 reason = hci_to_mgmt_reason(ev->reason); + mgmt_device_disconnected(hdev, &conn->dst, conn->type, - conn->dst_type); + conn->dst_type, reason); + } } if (ev->status == 0) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a3329cbd3e4d..05d4b83a0189 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3077,16 +3077,17 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data) } int mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, - u8 link_type, u8 addr_type) + u8 link_type, u8 addr_type, u8 reason) { - struct mgmt_addr_info ev; + struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; int err; mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); - bacpy(&ev.bdaddr, bdaddr); - ev.type = link_to_bdaddr(link_type, addr_type); + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_bdaddr(link_type, addr_type); + ev.reason = reason; err = mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev), sk); -- cgit v1.2.1 From ab19516a50b375c11b9fa442954a43454a730950 Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Fri, 27 Jul 2012 23:51:22 +0530 Subject: Bluetooth: debug: Correct the PSM printing Earlier we were printing chan->psm before assigning any value. Signed-off-by: Syam Sidhardhan Acked-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9f8b29ef5b68..dae895e3ca75 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1446,7 +1446,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, int err; BT_DBG("%s -> %s (type %u) psm 0x%2.2x", batostr(src), batostr(dst), - dst_type, __le16_to_cpu(chan->psm)); + dst_type, __le16_to_cpu(psm)); hdev = hci_get_route(dst, src); if (!hdev) -- cgit v1.2.1 From 144ad33020a0af66fbb188ef3f13ca91c5326a69 Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Fri, 27 Jul 2012 23:51:21 +0530 Subject: Bluetooth: Use kref for l2cap channel reference counting This patch changes the struct l2cap_chan and associated code to use kref api for object refcounting and freeing. Suggested-by: Andrei Emeltchenko Signed-off-by: Jaganath Kanakkassery Signed-off-by: Syam Sidhardhan Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dae895e3ca75..9732f03cfbef 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -406,7 +406,7 @@ struct l2cap_chan *l2cap_chan_create(void) chan->state = BT_OPEN; - atomic_set(&chan->refcnt, 1); + kref_init(&chan->kref); /* This flag is cleared in l2cap_chan_ready() */ set_bit(CONF_NOT_COMPLETE, &chan->conf_state); @@ -416,8 +416,10 @@ struct l2cap_chan *l2cap_chan_create(void) return chan; } -static void l2cap_chan_destroy(struct l2cap_chan *chan) +static void l2cap_chan_destroy(struct kref *kref) { + struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref); + BT_DBG("chan %p", chan); write_lock(&chan_list_lock); @@ -429,17 +431,16 @@ static void l2cap_chan_destroy(struct l2cap_chan *chan) void l2cap_chan_hold(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); - atomic_inc(&c->refcnt); + kref_get(&c->kref); } void l2cap_chan_put(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); - if (atomic_dec_and_test(&c->refcnt)) - l2cap_chan_destroy(c); + kref_put(&c->kref, l2cap_chan_destroy); } void l2cap_chan_set_defaults(struct l2cap_chan *chan) -- cgit v1.2.1 From f91c8468df97d0ac18132eb38283524a74317901 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 17 Aug 2012 21:37:59 -0300 Subject: Bluetooth: Fix establishing ESCO links Commit 4cd2d98340b4f03d5532c30fdaeb451b035429cb "Bluetooth: Simplify the connection type handling" broke the creation of ESCO links. This patch adds a type parameter to hci_connect_sco() so it creates the connection of the right kind. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 98670b1df17b..3e65c021df50 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -521,8 +521,8 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return acl; } -static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, bdaddr_t *dst, - u8 sec_level, u8 auth_type) +static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 sec_level, u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -531,9 +531,9 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, bdaddr_t *dst, if (IS_ERR(acl)) return acl; - sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst); + sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { - sco = hci_conn_add(hdev, SCO_LINK, dst); + sco = hci_conn_add(hdev, type, dst); if (!sco) { hci_conn_put(acl); return ERR_PTR(-ENOMEM); @@ -574,7 +574,8 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, case ACL_LINK: return hci_connect_acl(hdev, dst, sec_level, auth_type); case SCO_LINK: - return hci_connect_sco(hdev, dst, sec_level, auth_type); + case ESCO_LINK: + return hci_connect_sco(hdev, type, dst, sec_level, auth_type); } return ERR_PTR(-EINVAL); -- cgit v1.2.1 From 06d7de831dab8b93adb86e039a2f3d36604a9197 Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Thu, 26 Jul 2012 09:51:08 +0800 Subject: Revert "rfkill: remove dead code" This reverts commit 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68. Those functions are needed and should not be removed, or there is no way to set the rfkill led trigger name. Signed-off-by: AceLan Kao Signed-off-by: Johannes Berg --- net/rfkill/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 752b72360ebc..c275bad12068 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -150,6 +150,20 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } +const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) +{ + return rfkill->led_trigger.name; +} +EXPORT_SYMBOL(rfkill_get_led_trigger_name); + +void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) +{ + BUG_ON(!rfkill); + + rfkill->ledtrigname = name; +} +EXPORT_SYMBOL(rfkill_set_led_trigger_name); + static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname -- cgit v1.2.1 From 203b42f7317494ae5e5efc7be6fb7f29c927f102 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Aug 2012 13:18:23 -0700 Subject: workqueue: make deferrable delayed_work initializer names consistent Initalizers for deferrable delayed_work are confused. * __DEFERRED_WORK_INITIALIZER() * DECLARE_DEFERRED_WORK() * INIT_DELAYED_WORK_DEFERRABLE() Rename them to * __DEFERRABLE_WORK_INITIALIZER() * DECLARE_DEFERRABLE_WORK() * INIT_DEFERRABLE_WORK() This patch doesn't cause any functional changes. Signed-off-by: Tejun Heo --- net/core/neighbour.c | 2 +- net/ipv4/inetpeer.c | 2 +- net/sunrpc/cache.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..112c6e2266e9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot allocate neighbour cache hashes"); rwlock_init(&tbl->lock); - INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); + INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd34..7b55c8648d4b 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -194,7 +194,7 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } static int addr_compare(const struct inetpeer_addr *a, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2afd2a84dc35..2a68bb3db772 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1635,7 +1635,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) void __init cache_initialize(void) { - INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); + INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean); } int cache_register_net(struct cache_detail *cd, struct net *net) -- cgit v1.2.1 From e7c2f967445dd2041f0f8e3179cca22bb8bb7f79 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Aug 2012 13:18:24 -0700 Subject: workqueue: use mod_delayed_work() instead of __cancel + queue Now that mod_delayed_work() is safe to call from IRQ handlers, __cancel_delayed_work() followed by queue_delayed_work() can be replaced with mod_delayed_work(). Most conversions are straight-forward except for the following. * net/core/link_watch.c: linkwatch_schedule_work() was doing a quite elaborate dancing around its delayed_work. Collapse it such that linkwatch_work is queued for immediate execution if LW_URGENT and existing timer is kept otherwise. Signed-off-by: Tejun Heo Cc: "David S. Miller" Cc: Tomi Valkeinen --- net/core/link_watch.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index c3519c6d1b16..8e397a69005a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -120,22 +120,13 @@ static void linkwatch_schedule_work(int urgent) delay = 0; /* - * This is true if we've scheduled it immeditately or if we don't - * need an immediate execution and it's already pending. + * If urgent, schedule immediate execution; otherwise, don't + * override the existing timer. */ - if (schedule_delayed_work(&linkwatch_work, delay) == !delay) - return; - - /* Don't bother if there is nothing urgent. */ - if (!test_bit(LW_URGENT, &linkwatch_flags)) - return; - - /* It's already running which is good enough. */ - if (!__cancel_delayed_work(&linkwatch_work)) - return; - - /* Otherwise we reschedule it again for immediate execution. */ - schedule_delayed_work(&linkwatch_work, 0); + if (test_bit(LW_URGENT, &linkwatch_flags)) + mod_delayed_work(system_wq, &linkwatch_work, 0); + else + schedule_delayed_work(&linkwatch_work, delay); } -- cgit v1.2.1 From 144d56e91044181ec0ef67aeca91e9a8b5718348 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Aug 2012 00:22:46 +0000 Subject: tcp: fix possible socket refcount problem Commit 6f458dfb40 (tcp: improve latencies of timer triggered events) added bug leading to following trace : [ 2866.131281] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.131726] [ 2866.132188] ========================= [ 2866.132281] [ BUG: held lock freed! ] [ 2866.132281] 3.6.0-rc1+ #622 Not tainted [ 2866.132281] ------------------------- [ 2866.132281] kworker/0:1/652 is freeing memory ffff880019ec0000-ffff880019ec0a1f, with a lock still held there! [ 2866.132281] (sk_lock-AF_INET-RPC){+.+...}, at: [] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] 4 locks held by kworker/0:1/652: [ 2866.132281] #0: (rpciod){.+.+.+}, at: [] process_one_work+0x1de/0x47f [ 2866.132281] #1: ((&task->u.tk_work)){+.+.+.}, at: [] process_one_work+0x1de/0x47f [ 2866.132281] #2: (sk_lock-AF_INET-RPC){+.+...}, at: [] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] #3: (&icsk->icsk_retransmit_timer){+.-...}, at: [] run_timer_softirq+0x1ad/0x35f [ 2866.132281] [ 2866.132281] stack backtrace: [ 2866.132281] Pid: 652, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #622 [ 2866.132281] Call Trace: [ 2866.132281] [] debug_check_no_locks_freed+0x112/0x159 [ 2866.132281] [] ? __sk_free+0xfd/0x114 [ 2866.132281] [] kmem_cache_free+0x6b/0x13a [ 2866.132281] [] __sk_free+0xfd/0x114 [ 2866.132281] [] sk_free+0x1c/0x1e [ 2866.132281] [] tcp_write_timer+0x51/0x56 [ 2866.132281] [] run_timer_softirq+0x218/0x35f [ 2866.132281] [] ? run_timer_softirq+0x1ad/0x35f [ 2866.132281] [] ? rb_commit+0x58/0x85 [ 2866.132281] [] ? tcp_write_timer_handler+0x148/0x148 [ 2866.132281] [] __do_softirq+0xcb/0x1f9 [ 2866.132281] [] ? _raw_spin_unlock+0x29/0x2e [ 2866.132281] [] call_softirq+0x1c/0x30 [ 2866.132281] [] do_softirq+0x4a/0xa6 [ 2866.132281] [] irq_exit+0x51/0xad [ 2866.132281] [] do_IRQ+0x9d/0xb4 [ 2866.132281] [] common_interrupt+0x6f/0x6f [ 2866.132281] [] ? sched_clock_cpu+0x58/0xd1 [ 2866.132281] [] ? _raw_spin_unlock_irqrestore+0x4c/0x56 [ 2866.132281] [] mod_timer+0x178/0x1a9 [ 2866.132281] [] sk_reset_timer+0x19/0x26 [ 2866.132281] [] tcp_rearm_rto+0x99/0xa4 [ 2866.132281] [] tcp_event_new_data_sent+0x6e/0x70 [ 2866.132281] [] tcp_write_xmit+0x7de/0x8e4 [ 2866.132281] [] ? __alloc_skb+0xa0/0x1a1 [ 2866.132281] [] __tcp_push_pending_frames+0x2e/0x8a [ 2866.132281] [] tcp_sendmsg+0xb32/0xcc6 [ 2866.132281] [] inet_sendmsg+0xaa/0xd5 [ 2866.132281] [] ? inet_autobind+0x5f/0x5f [ 2866.132281] [] ? trace_clock_local+0x9/0xb [ 2866.132281] [] sock_sendmsg+0xa3/0xc4 [ 2866.132281] [] ? rb_reserve_next_event+0x26f/0x2d5 [ 2866.132281] [] ? native_sched_clock+0x29/0x6f [ 2866.132281] [] ? sched_clock+0x9/0xd [ 2866.132281] [] ? trace_clock_local+0x9/0xb [ 2866.132281] [] kernel_sendmsg+0x37/0x43 [ 2866.132281] [] xs_send_kvec+0x77/0x80 [ 2866.132281] [] xs_sendpages+0x6f/0x1a0 [ 2866.132281] [] ? try_to_del_timer_sync+0x55/0x61 [ 2866.132281] [] xs_tcp_send_request+0x55/0xf1 [ 2866.132281] [] xprt_transmit+0x89/0x1db [ 2866.132281] [] ? call_connect+0x3c/0x3c [ 2866.132281] [] call_transmit+0x1c5/0x20e [ 2866.132281] [] __rpc_execute+0x6f/0x225 [ 2866.132281] [] ? call_connect+0x3c/0x3c [ 2866.132281] [] rpc_async_schedule+0x28/0x34 [ 2866.132281] [] process_one_work+0x24d/0x47f [ 2866.132281] [] ? process_one_work+0x1de/0x47f [ 2866.132281] [] ? __rpc_execute+0x225/0x225 [ 2866.132281] [] worker_thread+0x236/0x317 [ 2866.132281] [] ? process_scheduled_works+0x2f/0x2f [ 2866.132281] [] kthread+0x9a/0xa2 [ 2866.132281] [] kernel_thread_helper+0x4/0x10 [ 2866.132281] [] ? retint_restore_args+0x13/0x13 [ 2866.132281] [] ? __init_kthread_worker+0x5a/0x5a [ 2866.132281] [] ? gs_change+0x13/0x13 [ 2866.308506] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.309689] ============================================================================= [ 2866.310254] BUG TCP (Not tainted): Object already free [ 2866.310254] ----------------------------------------------------------------------------- [ 2866.310254] The bug comes from the fact that timer set in sk_reset_timer() can run before we actually do the sock_hold(). socket refcount reaches zero and we free the socket too soon. timer handler is not allowed to reduce socket refcnt if socket is owned by the user, or we need to change sk_reset_timer() implementation. We should take a reference on the socket in case TCP_DELACK_TIMER_DEFERRED or TCP_DELACK_TIMER_DEFERRED bit are set in tsq_flags Also fix a typo in tcp_delack_timer(), where TCP_WRITE_TIMER_DEFERRED was used instead of TCP_DELACK_TIMER_DEFERRED. For consistency, use same socket refcount change for TCP_MTU_REDUCED_DEFERRED, even if not fired from a timer. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 8 +++++--- net/ipv4/tcp_output.c | 14 +++++++++----- net/ipv4/tcp_timer.c | 6 ++++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5bf2040b25b1..00a748d14062 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ tp->mtu_info = info; - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + } else { + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + sock_hold(sk); + } goto out; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20dfd892c86f..d04632673a9e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); - - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { tcp_delack_timer_handler(sk); - - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { sk->sk_prot->mtu_reduced(sk); + __sock_put(sk); + } } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad55a38..b774a03bd1dc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); @@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.1 From 1a7b27c97ce675b42eeb7bfaf6e15c34f35c8f95 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 20 Aug 2012 02:52:09 +0000 Subject: ipv4: Use newinet->inet_opt in inet_csk_route_child_sock() Since 0e734419923bd ("ipv4: Use inet_csk_route_child_sock() in DCCP and TCP."), inet_csk_route_child_sock() is called instead of inet_csk_route_req(). However, after creating the child-sock in tcp/dccp_v4_syn_recv_sock(), ireq->opt is set to NULL, before calling inet_csk_route_child_sock(). Thus, inside inet_csk_route_child_sock() opt is always NULL and the SRR-options are not respected anymore. Packets sent by the server won't have the correct destination-IP. This patch fixes it by accessing newinet->inet_opt instead of ireq->opt inside inet_csk_route_child_sock(). Reported-by: Luca Boccassi Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17c00f7..7f75f21d7b83 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *newinet = inet_sk(newsk); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; + + rcu_read_lock(); + opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } -- cgit v1.2.1 From a9915a1b52df52ad87f3b33422da95cf25372f09 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Aug 2012 07:26:45 +0000 Subject: ipv4: fix ip header ident selection in __ip_make_skb() Christian Casteyde reported a kmemcheck 32-bit read from uninitialized memory in __ip_select_ident(). It turns out that __ip_make_skb() called ip_select_ident() before properly initializing iph->daddr. This is a bug uncovered by commit 1d861aa4b3fb (inet: Minimize use of cached route inetpeer.) Addresses https://bugzilla.kernel.org/show_bug.cgi?id=46131 Reported-by: Christian Casteyde Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 147ccc3e93db..c196d749daf2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); + ip_select_ident(iph, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; -- cgit v1.2.1 From e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Aug 2012 06:21:17 +0000 Subject: af_netlink: force credentials passing [CVE-2012-3520] Pablo Neira Ayuso discovered that avahi and potentially NetworkManager accept spoofed Netlink messages because of a kernel bug. The kernel passes all-zero SCM_CREDENTIALS ancillary data to the receiver if the sender did not provide such data, instead of not including any such data at all or including the correct data from the peer (as it is the case with AF_UNIX). This bug was introduced in commit 16e572626961 (af_unix: dont send SCM_CREDENTIALS by default) This patch forces passing credentials for netlink, as before the regression. Another fix would be to not add SCM_CREDENTIALS in netlink messages if not provided by the sender, but it might break some programs. With help from Florian Weimer & Petr Matousek This issue is designated as CVE-2012-3520 Signed-off-by: Eric Dumazet Cc: Petr Matousek Cc: Florian Weimer Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- net/unix/af_unix.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969da45b..1445d73533ed 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &scm; - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, true); if (err < 0) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e4768c180da2..c5ee4ff61364 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; -- cgit v1.2.1 From 6d4221b53707486dfad3f5bfe568d2ce7f4c9863 Mon Sep 17 00:00:00 2001 From: Jim Schutt Date: Fri, 10 Aug 2012 10:37:38 -0700 Subject: libceph: avoid truncation due to racing banners Because the Ceph client messenger uses a non-blocking connect, it is possible for the sending of the client banner to race with the arrival of the banner sent by the peer. When ceph_sock_state_change() notices the connect has completed, it schedules work to process the socket via con_work(). During this time the peer is writing its banner, and arrival of the peer banner races with con_work(). If con_work() calls try_read() before the peer banner arrives, there is nothing for it to do, after which con_work() calls try_write() to send the client's banner. In this case Ceph's protocol negotiation can complete succesfully. The server-side messenger immediately sends its banner and addresses after accepting a connect request, *before* actually attempting to read or verify the banner from the client. As a result, it is possible for the banner from the server to arrive before con_work() calls try_read(). If that happens, try_read() will read the banner and prepare protocol negotiation info via prepare_write_connect(). prepare_write_connect() calls con_out_kvec_reset(), which discards the as-yet-unsent client banner. Next, con_work() calls try_write(), which sends the protocol negotiation info rather than the banner that the peer is expecting. The result is that the peer sees an invalid banner, and the client reports "negotiation failed". Fix this by moving con_out_kvec_reset() out of prepare_write_connect() to its callers at all locations except the one where the banner might still need to be sent. [elder@inktak.com: added note about server-side behavior] Signed-off-by: Jim Schutt Reviewed-by: Alex Elder --- net/ceph/messenger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b9796750034a..24c5eea8c45b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -915,7 +915,6 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.authorizer_len = auth ? cpu_to_le32(auth->authorizer_buf_len) : 0; - con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); if (auth && auth->authorizer_buf_len) @@ -1557,6 +1556,7 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1577,6 +1577,7 @@ static int process_connect(struct ceph_connection *con) ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); reset_connection(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1601,6 +1602,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.connect_seq), le32_to_cpu(con->in_reply.connect_seq)); con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1617,6 +1619,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, le32_to_cpu(con->in_reply.global_seq)); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -2135,7 +2138,11 @@ more: BUG_ON(con->state != CON_STATE_CONNECTING); con->state = CON_STATE_NEGOTIATING; - /* Banner is good, exchange connection info */ + /* + * Received banner is good, exchange connection info. + * Do not reset out_kvec, as sending our banner raced + * with receiving peer banner after connect completed. + */ ret = prepare_write_connect(con); if (ret < 0) goto out; -- cgit v1.2.1 From 27f011243a6e4e8b81078df1d83608dae31e3d38 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 20 Aug 2012 11:28:25 -0700 Subject: mac80211: fix DS to MBSS address translation The destination address of unicast frames forwarded through a mesh gate was being replaced with the broadcast address. Instead leave the original destination address as the mesh DA. If the nexthop address is not in the mpath table it will be resolved. If that fails, the frame will be forwarded to known mesh gates. Reported-by: Cedric Voncken Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index acf712ffb5e6..c5e8c9c31f76 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1811,37 +1811,31 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, NULL, NULL); } else { - int is_mesh_mcast = 1; - const u8 *mesh_da; + /* DS -> MBSS (802.11-2012 13.11.3.3). + * For unicast with unknown forwarding information, + * destination might be in the MBSS or if that fails + * forwarded to another mesh gate. In either case + * resolution will be handled in ieee80211_xmit(), so + * leave the original DA. This also works for mcast */ + const u8 *mesh_da = skb->data; + + if (mppath) + mesh_da = mppath->mpp; + else if (mpath) + mesh_da = mpath->dst; + rcu_read_unlock(); - if (is_multicast_ether_addr(skb->data)) - /* DA TA mSA AE:SA */ - mesh_da = skb->data; - else { - static const u8 bcast[ETH_ALEN] = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - if (mppath) { - /* RA TA mDA mSA AE:DA SA */ - mesh_da = mppath->mpp; - is_mesh_mcast = 0; - } else if (mpath) { - mesh_da = mpath->dst; - is_mesh_mcast = 0; - } else { - /* DA TA mSA AE:SA */ - mesh_da = bcast; - } - } hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, mesh_da, sdata->vif.addr); - rcu_read_unlock(); - if (is_mesh_mcast) + if (is_multicast_ether_addr(mesh_da)) + /* DA TA mSA AE:SA */ meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, skb->data + ETH_ALEN, NULL); else + /* RA TA mDA mSA AE:DA SA */ meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, -- cgit v1.2.1 From aba4e6fff8de0c92e53f0e7ef077231e75f7d760 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Wed, 22 Aug 2012 14:21:07 +0530 Subject: mac80211: Fix AP mode regression Commit mac80211: avoid using synchronize_rcu in ieee80211_set_probe_resp changed the return value when the probe response template is not present. Revert to the earlier value of 1 - this fixes AP mode for drivers like ath9k. Signed-off-by: Sujith Manoharan Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 69b322f6ca2e..929f897a8ded 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -740,7 +740,7 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, struct probe_resp *new, *old; if (!resp || !resp_len) - return -EINVAL; + return 1; old = rtnl_dereference(sdata->u.ap.probe_resp); -- cgit v1.2.1 From 6705e86724f1ac83394592be7dbfc0c07ac25aa2 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 16 Aug 2012 18:33:39 +0000 Subject: netfilter: replace list_for_each_continue_rcu with new interface This patch replaces list_for_each_continue_rcu() with list_for_each_entry_continue_rcu() to allow removing list_for_each_continue_rcu(). Signed-off-by: Michael Wang Reviewed-by: Paul E. McKenney Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0bc6b60db4df..8f4b0b2b6f80 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -131,14 +131,13 @@ unsigned int nf_iterate(struct list_head *head, int hook_thresh) { unsigned int verdict; + struct nf_hook_ops *elem = list_entry_rcu(*i, struct nf_hook_ops, list); /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_continue_rcu(*i, head) { - struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; - + list_for_each_entry_continue_rcu(elem, head, list) { if (hook_thresh > elem->priority) continue; @@ -155,11 +154,14 @@ repeat: continue; } #endif - if (verdict != NF_REPEAT) + if (verdict != NF_REPEAT) { + *i = &elem->list; return verdict; + } goto repeat; } } + *i = &elem->list; return NF_ACCEPT; } -- cgit v1.2.1 From 90efbed18a30d78145419cdbd44f9ec152efeb16 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sun, 19 Aug 2012 15:11:32 +0000 Subject: netfilter: remove unnecessary goto statement for error recovery Usually it's a good practice to use goto statement for error recovery when initializing the module. This approach could be an overkill if: 1) there is only one fail case; 2) success and failure use the same return statement. For a cleaner approach, remove the unnecessary goto statement and directly implement error recovery. Signed-off-by: Jean Sacren Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/iptable_filter.c | 6 +----- net/ipv4/netfilter/iptable_mangle.c | 6 +----- net/ipv4/netfilter/iptable_raw.c | 6 +----- 3 files changed, 3 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index d20cc374025c..6b3da5cf54e9 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -94,14 +94,10 @@ static int __init iptable_filter_init(void) filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); if (IS_ERR(filter_ops)) { ret = PTR_ERR(filter_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_filter_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_filter_net_ops); - return ret; } static void __exit iptable_filter_fini(void) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index f38b94288cf5..85d88f206447 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -129,14 +129,10 @@ static int __init iptable_mangle_init(void) mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { ret = PTR_ERR(mangle_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_mangle_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_mangle_net_ops); - return ret; } static void __exit iptable_mangle_fini(void) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index b21e2191e2f5..03d9696d3c6e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -73,14 +73,10 @@ static int __init iptable_raw_init(void) rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); if (IS_ERR(rawtable_ops)) { ret = PTR_ERR(rawtable_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_raw_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_raw_net_ops); - return ret; } static void __exit iptable_raw_fini(void) -- cgit v1.2.1 From 46df7b814548849deee01f50bc75f8f5ae8cd767 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 22 Feb 2012 19:58:59 -0800 Subject: openvswitch: Add support for network namespaces. Following patch adds support for network namespace to openvswitch. Since it must release devices when namespaces are destroyed, a side effect of this patch is that the module no longer keeps a refcount but instead cleans up any state when it is unloaded. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 269 +++++++++++++++++++++-------------- net/openvswitch/datapath.h | 19 ++- net/openvswitch/dp_notify.c | 8 +- net/openvswitch/vport-internal_dev.c | 7 +- net/openvswitch/vport-netdev.c | 2 +- net/openvswitch/vport.c | 22 +-- net/openvswitch/vport.h | 3 +- 7 files changed, 207 insertions(+), 123 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8277d29e710..cad39fca75a9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -49,11 +49,28 @@ #include #include #include +#include +#include #include "datapath.h" #include "flow.h" #include "vport-internal_dev.h" +/** + * struct ovs_net - Per net-namespace data for ovs. + * @dps: List of datapaths to enable dumping them all out. + * Protected by genl_mutex. + */ +struct ovs_net { + struct list_head dps; +}; + +static int ovs_net_id __read_mostly; + +#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) +static void rehash_flow_table(struct work_struct *work); +static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); + /** * DOC: Locking: * @@ -71,29 +88,21 @@ * each other. */ -/* Global list of datapaths to enable dumping them all out. - * Protected by genl_mutex. - */ -static LIST_HEAD(dps); - -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) -static void rehash_flow_table(struct work_struct *work); -static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); - static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); -static int queue_userspace_packet(int dp_ifindex, struct sk_buff *, +static int queue_userspace_packet(struct net *, int dp_ifindex, + struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ -static struct datapath *get_dp(int dp_ifindex) +static struct datapath *get_dp(struct net *net, int dp_ifindex) { struct datapath *dp = NULL; struct net_device *dev; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, dp_ifindex); + dev = dev_get_by_index_rcu(net, dp_ifindex); if (dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); if (vport) @@ -135,6 +144,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); free_percpu(dp->stats_percpu); + release_net(ovs_dp_get_net(dp)); kfree(dp); } @@ -220,11 +230,12 @@ static struct genl_family dp_packet_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true }; int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, - const struct dp_upcall_info *upcall_info) + const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; int dp_ifindex; @@ -242,9 +253,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, } if (!skb_is_gso(skb)) - err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); else - err = queue_gso_packets(dp_ifindex, skb, upcall_info); + err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); if (err) goto err; @@ -260,7 +271,8 @@ err: return err; } -static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, +static int queue_gso_packets(struct net *net, int dp_ifindex, + struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -276,7 +288,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); if (err) break; @@ -306,7 +318,8 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, return err; } -static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, +static int queue_userspace_packet(struct net *net, int dp_ifindex, + struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; @@ -362,7 +375,7 @@ static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->pid); out: kfree_skb(nskb); @@ -370,15 +383,10 @@ out: } /* Called with genl_mutex. */ -static int flush_flows(int dp_ifindex) +static int flush_flows(struct datapath *dp) { struct flow_table *old_table; struct flow_table *new_table; - struct datapath *dp; - - dp = get_dp(dp_ifindex); - if (!dp) - return -ENODEV; old_table = genl_dereference(dp->table); new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); @@ -668,7 +676,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->priority = flow->key.phy.priority; rcu_read_lock(); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; @@ -742,7 +750,8 @@ static struct genl_family dp_flow_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX + .maxattr = OVS_FLOW_ATTR_MAX, + .netnsok = true }; static struct genl_multicast_group ovs_dp_flow_multicast_group = { @@ -894,7 +903,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; } - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); error = -ENODEV; if (!dp) goto error; @@ -995,7 +1004,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; @@ -1023,7 +1032,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (err) return err; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; @@ -1052,16 +1061,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) int err; int key_len; + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + if (!a[OVS_FLOW_ATTR_KEY]) - return flush_flows(ovs_header->dp_ifindex); + return flush_flows(dp); + err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); if (err) return err; - dp = get_dp(ovs_header->dp_ifindex); - if (!dp) - return -ENODEV; - table = genl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) @@ -1090,7 +1100,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) struct datapath *dp; struct flow_table *table; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; @@ -1152,7 +1162,8 @@ static struct genl_family dp_datapath_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX + .maxattr = OVS_DP_ATTR_MAX, + .netnsok = true }; static struct genl_multicast_group ovs_dp_datapath_multicast_group = { @@ -1210,18 +1221,19 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, } /* Called with genl_mutex and optionally with RTNL lock also. */ -static struct datapath *lookup_datapath(struct ovs_header *ovs_header, +static struct datapath *lookup_datapath(struct net *net, + struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { struct datapath *dp; if (!a[OVS_DP_ATTR_NAME]) - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); else { struct vport *vport; rcu_read_lock(); - vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME])); + vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; rcu_read_unlock(); } @@ -1235,6 +1247,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; struct vport *vport; + struct ovs_net *ovs_net; int err; err = -EINVAL; @@ -1242,15 +1255,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err; rtnl_lock(); - err = -ENODEV; - if (!try_module_get(THIS_MODULE)) - goto err_unlock_rtnl; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_put_module; + goto err_unlock_rtnl; + INIT_LIST_HEAD(&dp->port_list); + ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ err = -ENOMEM; @@ -1287,7 +1299,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) goto err_destroy_local_port; - list_add_tail(&dp->list_node, &dps); + ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); + list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); genl_notify(reply, genl_info_net(info), info->snd_pid, @@ -1302,34 +1315,20 @@ err_destroy_percpu: err_destroy_table: ovs_flow_tbl_destroy(genl_dereference(dp->table)); err_free_dp: + release_net(ovs_dp_get_net(dp)); kfree(dp); -err_put_module: - module_put(THIS_MODULE); err_unlock_rtnl: rtnl_unlock(); err: return err; } -static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) +/* Called with genl_mutex. */ +static void __dp_destroy(struct datapath *dp) { struct vport *vport, *next_vport; - struct sk_buff *reply; - struct datapath *dp; - int err; rtnl_lock(); - dp = lookup_datapath(info->userhdr, info->attrs); - err = PTR_ERR(dp); - if (IS_ERR(dp)) - goto exit_unlock; - - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, - info->snd_seq, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); @@ -1345,17 +1344,32 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); call_rcu(&dp->rcu, destroy_dp_rcu); - module_put(THIS_MODULE); +} + +static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct datapath *dp; + int err; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + err = PTR_ERR(dp); + if (IS_ERR(dp)) + return err; + + reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + info->snd_seq, OVS_DP_CMD_DEL); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + return err; + + __dp_destroy(dp); genl_notify(reply, genl_info_net(info), info->snd_pid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; - -exit_unlock: - rtnl_unlock(); - return err; } static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) @@ -1364,7 +1378,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - dp = lookup_datapath(info->userhdr, info->attrs); + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) return PTR_ERR(dp); @@ -1372,7 +1386,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_datapath_multicast_group.id, err); return 0; } @@ -1389,7 +1403,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; - dp = lookup_datapath(info->userhdr, info->attrs); + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) return PTR_ERR(dp); @@ -1403,11 +1417,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); struct datapath *dp; int skip = cb->args[0]; int i = 0; - list_for_each_entry(dp, &dps, list_node) { + list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1459,7 +1474,8 @@ static struct genl_family dp_vport_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true }; struct genl_multicast_group ovs_dp_vport_multicast_group = { @@ -1525,14 +1541,15 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, } /* Called with RTNL lock or RCU read lock. */ -static struct vport *lookup_vport(struct ovs_header *ovs_header, +static struct vport *lookup_vport(struct net *net, + struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) { struct datapath *dp; struct vport *vport; if (a[OVS_VPORT_ATTR_NAME]) { - vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME])); + vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) return ERR_PTR(-ENODEV); if (ovs_header->dp_ifindex && @@ -1545,7 +1562,7 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header, if (port_no >= DP_MAX_PORTS) return ERR_PTR(-EFBIG); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (!dp) return ERR_PTR(-ENODEV); @@ -1574,7 +1591,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) goto exit; rtnl_lock(); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto exit_unlock; @@ -1638,7 +1655,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) int err; rtnl_lock(); - vport = lookup_vport(info->userhdr, a); + vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; @@ -1658,7 +1675,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); goto exit_unlock; } @@ -1679,7 +1696,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) int err; rtnl_lock(); - vport = lookup_vport(info->userhdr, a); + vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; @@ -1714,7 +1731,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) int err; rcu_read_lock(); - vport = lookup_vport(ovs_header, a); + vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; @@ -1741,7 +1758,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 port_no; int retval; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; @@ -1766,28 +1783,6 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return retval; } -static void rehash_flow_table(struct work_struct *work) -{ - struct datapath *dp; - - genl_lock(); - - list_for_each_entry(dp, &dps, list_node) { - struct flow_table *old_table = genl_dereference(dp->table); - struct flow_table *new_table; - - new_table = ovs_flow_tbl_rehash(old_table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_deferred_destroy(old_table); - } - } - - genl_unlock(); - - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); -} - static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1872,6 +1867,59 @@ error: return err; } +static void rehash_flow_table(struct work_struct *work) +{ + struct datapath *dp; + struct net *net; + + genl_lock(); + rtnl_lock(); + for_each_net(net) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + struct flow_table *old_table = genl_dereference(dp->table); + struct flow_table *new_table; + + new_table = ovs_flow_tbl_rehash(old_table); + if (!IS_ERR(new_table)) { + rcu_assign_pointer(dp->table, new_table); + ovs_flow_tbl_deferred_destroy(old_table); + } + } + } + rtnl_unlock(); + genl_unlock(); + + schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); +} + +static int __net_init ovs_init_net(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + INIT_LIST_HEAD(&ovs_net->dps); + return 0; +} + +static void __net_exit ovs_exit_net(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp, *dp_next; + + genl_lock(); + list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) + __dp_destroy(dp); + genl_unlock(); +} + +static struct pernet_operations ovs_net_ops = { + .init = ovs_init_net, + .exit = ovs_exit_net, + .id = &ovs_net_id, + .size = sizeof(struct ovs_net), +}; + static int __init dp_init(void) { struct sk_buff *dummy_skb; @@ -1889,10 +1937,14 @@ static int __init dp_init(void) if (err) goto error_flow_exit; - err = register_netdevice_notifier(&ovs_dp_device_notifier); + err = register_pernet_device(&ovs_net_ops); if (err) goto error_vport_exit; + err = register_netdevice_notifier(&ovs_dp_device_notifier); + if (err) + goto error_netns_exit; + err = dp_register_genl(); if (err < 0) goto error_unreg_notifier; @@ -1903,6 +1955,8 @@ static int __init dp_init(void) error_unreg_notifier: unregister_netdevice_notifier(&ovs_dp_device_notifier); +error_netns_exit: + unregister_pernet_device(&ovs_net_ops); error_vport_exit: ovs_vport_exit(); error_flow_exit: @@ -1914,9 +1968,10 @@ error: static void dp_cleanup(void) { cancel_delayed_work_sync(&rehash_flow_wq); - rcu_barrier(); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); unregister_netdevice_notifier(&ovs_dp_device_notifier); + unregister_pernet_device(&ovs_net_ops); + rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index c1105c147531..771c11e13e34 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,8 +27,7 @@ #include #include "flow.h" - -struct vport; +#include "vport.h" #define DP_MAX_PORTS 1024 #define SAMPLE_ACTION_DEPTH 3 @@ -63,6 +62,7 @@ struct dp_stats_percpu { * @port_list: List of all ports in @ports in arbitrary order. RTNL required * to iterate or modify. * @stats_percpu: Per-CPU datapath statistics. + * @net: Reference to net namespace. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -80,6 +80,11 @@ struct datapath { /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; + +#ifdef CONFIG_NET_NS + /* Network namespace ref. */ + struct net *net; +#endif }; /** @@ -108,6 +113,16 @@ struct dp_upcall_info { u32 pid; }; +static inline struct net *ovs_dp_get_net(struct datapath *dp) +{ + return read_pnet(&dp->net); +} + +static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) +{ + write_pnet(&dp->net, net); +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_multicast_group ovs_dp_vport_multicast_group; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 36dcee8fc84a..5558350e0d33 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -41,19 +41,21 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UNREGISTER: if (!ovs_is_internal_dev(dev)) { struct sk_buff *notify; + struct datapath *dp = vport->dp; notify = ovs_vport_cmd_build_info(vport, 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, ovs_dp_vport_multicast_group.id, PTR_ERR(notify)); break; } - genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id, - GFP_KERNEL); + genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, + ovs_dp_vport_multicast_group.id, + GFP_KERNEL); } break; } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 4061b9ee07f7..5d460c37df07 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -144,7 +144,7 @@ static void do_setup(struct net_device *netdev) netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; netdev->vlan_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_TX; @@ -175,9 +175,14 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) goto error_free_vport; } + dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(netdev_vport->dev); internal_dev->vport = vport; + /* Restrict bridge port to current netns. */ + if (vport->port_no == OVSP_LOCAL) + netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; + err = register_netdevice(netdev_vport->dev); if (err) goto error_free_netdev; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6ea3551cc78c..3c1e58ba714b 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -83,7 +83,7 @@ static struct vport *netdev_create(const struct vport_parms *parms) netdev_vport = netdev_vport_priv(vport); - netdev_vport->dev = dev_get_by_name(&init_net, parms->name); + netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); if (!netdev_vport->dev) { err = -ENODEV; goto error_free_vport; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6140336e79d7..9873acea9785 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -16,10 +16,10 @@ * 02110-1301, USA */ -#include #include #include #include +#include #include #include #include @@ -27,7 +27,9 @@ #include #include #include +#include +#include "datapath.h" #include "vport.h" #include "vport-internal_dev.h" @@ -67,9 +69,9 @@ void ovs_vport_exit(void) kfree(dev_table); } -static struct hlist_head *hash_bucket(const char *name) +static struct hlist_head *hash_bucket(struct net *net, const char *name) { - unsigned int hash = full_name_hash(name, strlen(name)); + unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } @@ -80,14 +82,15 @@ static struct hlist_head *hash_bucket(const char *name) * * Must be called with RTNL or RCU read lock. */ -struct vport *ovs_vport_locate(const char *name) +struct vport *ovs_vport_locate(struct net *net, const char *name) { - struct hlist_head *bucket = hash_bucket(name); + struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; struct hlist_node *node; hlist_for_each_entry_rcu(vport, node, bucket, hash_node) - if (!strcmp(name, vport->ops->get_name(vport))) + if (!strcmp(name, vport->ops->get_name(vport)) && + net_eq(ovs_dp_get_net(vport->dp), net)) return vport; return NULL; @@ -170,14 +173,17 @@ struct vport *ovs_vport_add(const struct vport_parms *parms) for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { if (vport_ops_list[i]->type == parms->type) { + struct hlist_head *bucket; + vport = vport_ops_list[i]->create(parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); goto out; } - hlist_add_head_rcu(&vport->hash_node, - hash_bucket(vport->ops->get_name(vport))); + bucket = hash_bucket(ovs_dp_get_net(vport->dp), + vport->ops->get_name(vport)); + hlist_add_head_rcu(&vport->hash_node, bucket); return vport; } } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index aac680ca2b06..97cef08d981b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -20,6 +20,7 @@ #define VPORT_H 1 #include +#include #include #include #include @@ -38,7 +39,7 @@ void ovs_vport_exit(void); struct vport *ovs_vport_add(const struct vport_parms *); void ovs_vport_del(struct vport *); -struct vport *ovs_vport_locate(const char *name); +struct vport *ovs_vport_locate(struct net *net, const char *name); void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); -- cgit v1.2.1 From 9b04f350057863d1fad1ba071e09362a1da3503e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Aug 2012 20:48:29 +0000 Subject: ipv4: properly update pmtu Sylvain Munault reported following info : - TCP connection get "stuck" with data in send queue when doing "large" transfers ( like typing 'ps ax' on a ssh connection ) - Only happens on path where the PMTU is lower than the MTU of the interface - Is not present right after boot, it only appears 10-20min after boot or so. (and that's inside the _same_ TCP connection, it works fine at first and then in the same ssh session, it'll get stuck) - Definitely seems related to fragments somehow since I see a router sending ICMP message saying fragmentation is needed. - Exact same setup works fine with kernel 3.5.1 Problem happens when the 10 minutes (ip_rt_mtu_expires) expiration period is over. ip_rt_update_pmtu() calls dst_set_expires() to rearm a new expiration, but dst_set_expires() does nothing because dst.expires is already set. It seems we want to set the expires field to a new value, regardless of prior one. With help from Julian Anastasov. Reported-by: Sylvain Munaut Signed-off-by: Eric Dumazet CC: Julian Anastasov Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fd9ecb52c66b..8c8c748ebb28 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -956,7 +956,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, dst->obsolete = DST_OBSOLETE_KILL; } else { rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); } } -- cgit v1.2.1 From 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2012 17:19:46 +0000 Subject: net: remove delay at device dismantle I noticed extra one second delay in device dismantle, tracked down to a call to dst_dev_event() while some call_rcu() are still in RCU queues. These call_rcu() were posted by rt_free(struct rtable *rt) calls. We then wait a little (but one second) in netdev_wait_allrefs() before kicking again NETDEV_UNREGISTER. As the call_rcu() are now completed, dst_dev_event() can do the needed device swap on busy dst. To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called after a rcu_barrier(), but outside of RTNL lock. Use NETDEV_UNREGISTER_FINAL with care ! Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after IP cache removal. With help from Gao feng Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Cc: "Eric W. Biederman" Cc: Gao feng Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++-------------- net/core/dst.c | 2 +- net/core/fib_rules.c | 3 ++- net/core/rtnetlink.c | 2 +- net/ipv4/devinet.c | 6 +++++- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv6/addrconf.c | 6 +++++- 7 files changed, 26 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 088923fe4066..0640d2a859c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,7 +1406,6 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb) nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } unlock: @@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); + if (val != NETDEV_UNREGISTER_FINAL) + ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head) netdev_unregister_kobject(dev); } - /* Process any work delayed until the end of the batch */ - dev = list_first_entry(head, struct net_device, unreg_list); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); list_for_each_entry(dev, head, unreg_list) @@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users - * should have already handle it the first time */ - + rcu_barrier(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { /* We must not have linkwatch events @@ -5851,9 +5845,8 @@ void netdev_run_todo(void) __rtnl_unlock(); - /* Wait for rcu callbacks to finish before attempting to drain - * the device list. This usually avoids a 250ms wait. - */ + + /* Wait for rcu callbacks to finish before next phase */ if (!list_empty(&list)) rcu_barrier(); @@ -5862,6 +5855,8 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", dev->name, dev->reg_state); @@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char the device is just moving and can keep their slaves up. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* diff --git a/net/core/dst.c b/net/core/dst.c index 56d63612e1e4..f6593d238e9a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, struct dst_entry *dst, *last = NULL; switch (event) { - case NETDEV_UNREGISTER: + case NETDEV_UNREGISTER_FINAL: case NETDEV_DOWN: mutex_lock(&dst_gc_mutex); for (dst = dst_busy_list; dst; dst = dst->next) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..585093755c23 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net *net = dev_net(dev); struct fib_rules_ops *ops; - ASSERT_RTNL(); switch (event) { case NETDEV_REGISTER: + ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: + ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34d975b0f277..c64efcff8078 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_PRE_TYPE_CHANGE: case NETDEV_GOING_DOWN: case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_BATCH: + case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index adf273f8ad2e..6a5e6e4b142c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev; + if (event == NETDEV_UNREGISTER_FINAL) + goto out; + + in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7f073a38c87d..fd7d9ae64f16 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev; struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { @@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } - if (!in_dev) + if (event == NETDEV_UNREGISTER_FINAL) return NOTIFY_DONE; + in_dev = __in_dev_get_rtnl(dev); + switch (event) { case NETDEV_UP: for_ifa(in_dev) { @@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo case NETDEV_CHANGE: rt_cache_flush(dev_net(dev), 0); break; - case NETDEV_UNREGISTER_BATCH: - break; } return NOTIFY_DONE; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..e581009cb09e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev; int run_pending = 0; int err; + if (event == NETDEV_UNREGISTER_FINAL) + return NOTIFY_DONE; + + idev = __in6_dev_get(dev); switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { -- cgit v1.2.1 From ef8531b64c3e2443da52e9f05d74a988230eedc5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Aug 2012 12:31:48 +0200 Subject: xfrm: fix RCU bugs This patch reverts commit 56892261ed1a (xfrm: Use rcu_dereference_bh to deference pointer protected by rcu_read_lock_bh), and fixes bugs introduced in commit 418a99ac6ad ( Replace rwlock on xfrm_policy_afinfo with rcu ) 1) We properly use RCU variant in this file, not a mix of RCU/RCU_BH 2) We must defer some writes after the synchronize_rcu() call or a reader can crash dereferencing NULL pointer. 3) Now we use the xfrm_policy_afinfo_lock spinlock only from process context, we no longer need to block BH in xfrm_policy_register_afinfo() and xfrm_policy_unregister_afinfo() 4) Can use RCU_INIT_POINTER() instead of rcu_assign_pointer() in xfrm_policy_unregister_afinfo() 5) Remove a forward inline declaration (xfrm_policy_put_afinfo()), and also move xfrm_policy_get_afinfo() declaration. Signed-off-by: Eric Dumazet Cc: Fan Du Cc: Priyanka Jain Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 76 ++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2ed698c190b5..741a32aa512e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -48,8 +48,6 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); @@ -96,6 +94,24 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl return false; } +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + rcu_read_unlock(); +} + static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr, @@ -2421,7 +2437,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -2444,7 +2460,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) afinfo->garbage_collect = xfrm_garbage_collect_deferred; rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); } - spin_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock(&xfrm_policy_afinfo_lock); rtnl_lock(); for_each_net(net) { @@ -2477,23 +2493,26 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], - NULL); - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } + else + RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], + NULL); + } + spin_unlock(&xfrm_policy_afinfo_lock); + if (!err) { + struct dst_ops *dst_ops = afinfo->dst_ops; + + synchronize_rcu(); + + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + afinfo->garbage_collect = NULL; } - spin_unlock_bh(&xfrm_policy_afinfo_lock); - synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -2502,32 +2521,15 @@ static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - rcu_read_lock_bh(); - afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET]); + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; #if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET6]); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; #endif - rcu_read_unlock_bh(); -} - -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - rcu_read_lock(); - afinfo = rcu_dereference(xfrm_policy_afinfo[family]); - if (unlikely(!afinfo)) - rcu_read_unlock(); - return afinfo; -} - -static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ rcu_read_unlock(); } -- cgit v1.2.1 From b87fb39e399137257a6db3224ea854117e9486e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Aug 2012 03:47:30 +0000 Subject: ipv6: gre: fix ip6gre_err() ip6gre_err() miscomputes grehlen (sizeof(ipv6h) is 4 or 8, not 40 as expected), and should take into account 'offset' parameter. Also uses pskb_may_pull() to cope with some fragged skbs Signed-off-by: Eric Dumazet Cc: Dmitry Kozlov Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a84ad5dc4fcf..424d11a4e7ff 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -415,8 +415,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(ipv6h + 1); - int grehlen = sizeof(ipv6h) + 4; + __be16 *p = (__be16 *)(skb->data + offset); + int grehlen = offset + 4; struct ip6_tnl *t; __be16 flags; @@ -432,8 +432,10 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) + if (!pskb_may_pull(skb, grehlen)) return; + ipv6h = (const struct ipv6hdr *)skb->data; + p = (__be16 *)(skb->data + offset); rcu_read_lock(); -- cgit v1.2.1 From bfdc587c5af4ff155cf702b972e8fcd66d77d5f2 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 19 Aug 2012 21:44:08 +0000 Subject: rds: Don't disable BH on BH context Since we have already in BH context when *_write_space(), *_data_ready() as well as *_state_change() are called, it's unnecessary to disable BH. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/rds/tcp_connect.c | 4 ++-- net/rds/tcp_listen.c | 4 ++-- net/rds/tcp_recv.c | 4 ++-- net/rds/tcp_send.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index af95c8e058fc..a65ee78db0c5 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 72981375f47c..7787537e9c2e 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) rdsdebug("listen data ready sk %p\n", sk); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 6243258f840f..4fac4f2bb9dc 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -322,7 +322,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) rdsdebug("data ready sk %p bytes %d\n", sk, bytes); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -336,7 +336,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 1b4fd68f0c7c..81cf5a4c5e40 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -174,7 +174,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -194,7 +194,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if -- cgit v1.2.1 From 9e67030af367ab524d0856af9e992de241eca3c7 Mon Sep 17 00:00:00 2001 From: "danborkmann@iogearbox.net" Date: Mon, 20 Aug 2012 03:34:03 +0000 Subject: af_packet: use define instead of constant Instead of using a hard-coded value for the status variable, it would make the code more readable to use its destined define from linux/if_packet.h. Signed-off-by: daniel.borkmann@tik.ee.ethz.ch Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fe0912f161ce..bfaa0a83f0eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -855,7 +855,8 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); ppd->tp_status = TP_STATUS_VLAN_VALID; } else { - ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + ppd->hv1.tp_vlan_tci = 0; + ppd->tp_status = TP_STATUS_AVAILABLE; } } @@ -1951,7 +1952,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int tp_len, size_max; unsigned char *addr; int len_sum = 0; - int status = 0; + int status = TP_STATUS_AVAILABLE; int hlen, tlen; mutex_lock(&po->pg_vec_lock); -- cgit v1.2.1 From 0fa7fa98dbcc2789409ed24e885485e645803d7f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Aug 2012 01:06:47 +0000 Subject: packet: Protect packet sk list with mutex (v2) Change since v1: * Fixed inuse counters access spotted by Eric In patch eea68e2f (packet: Report socket mclist info via diag module) I've introduced a "scheduling in atomic" problem in packet diag module -- the socket list is traversed under rcu_read_lock() while performed under it sk mclist access requires rtnl lock (i.e. -- mutex) to be taken. [152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002 [152363.820573] 4 locks held by crtools/12517: [152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [] sock_diag_rcv+0x1f/0x3e [152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [] sock_diag_rcv_msg+0xdb/0x11a [152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [] netlink_dump+0x23/0x1ab [152363.820693] #3: (rcu_read_lock){.+.+..}, at: [] packet_diag_dump+0x0/0x1af Similar thing was then re-introduced by further packet diag patches (fanount mutex and pgvec mutex for rings) :( Apart from being terribly sorry for the above, I propose to change the packet sk list protection from spinlock to mutex. This lock currently protects two modifications: * sklist * prot inuse counters The sklist modifications can be just reprotected with mutex since they already occur in a sleeping context. The inuse counters modifications are trickier -- the __this_cpu_-s are used inside, thus requiring the caller to handle the potential issues with contexts himself. Since packet sockets' counters are modified in two places only (packet_create and packet_release) we only need to protect the context from being preempted. BH disabling is not required in this case. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 16 +++++++++++----- net/packet/diag.c | 6 +++--- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bfaa0a83f0eb..f220c5bdb71f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); @@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, register_prot_hook(sk); } - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); return 0; out: @@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - spin_lock_init(&net->packet.sklist_lock); + mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) diff --git a/net/packet/diag.c b/net/packet/diag.c index bc33fbe8a5ef..39bce0d50df9 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); - rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + mutex_lock(&net->packet.sklist_lock); + sk_for_each(sk, node, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) @@ -192,7 +192,7 @@ next: num++; } done: - rcu_read_unlock(); + mutex_unlock(&net->packet.sklist_lock); cb->args[0] = num; return skb->len; -- cgit v1.2.1 From c70437289c989d865633486fc24c61441ba32b9e Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 22 Aug 2012 00:42:40 +0200 Subject: batman-adv: move function arguments on one line Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 563cfbf94a7f..3f0dfa2f6d7c 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -41,8 +41,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, - bool is_bcast) + struct sk_buff *skb, short vid, bool is_bcast) { return 0; } -- cgit v1.2.1 From 536a23f119e35e58c762a219bafd398ba2ed7980 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 18 Jun 2012 18:39:26 +0200 Subject: batman-adv: Add the backbone gateway list to debugfs This is especially useful if there are no claims yet, but we still want to know which gateways are using bridge loop avoidance in the network. Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 65 ++++++++++++++++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 8 +++++ net/batman-adv/debugfs.c | 12 +++++++ 3 files changed, 85 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6705d35b17ce..7e60466f5718 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1616,3 +1616,68 @@ out: batadv_hardif_free_ref(primary_if); return ret; } + +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_backbone_gw *backbone_gw; + struct batadv_hard_iface *primary_if; + struct hlist_node *node; + struct hlist_head *head; + int secs, msecs; + uint32_t i; + bool is_own; + int ret = 0; + uint8_t *primary_addr; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - please specify interfaces to enable it\n", + net_dev->name); + goto out; + } + + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - primary interface not active\n", + net_dev->name); + goto out; + } + + primary_addr = primary_if->net_dev->dev_addr; + seq_printf(seq, + "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", + net_dev->name, primary_addr, + ntohs(bat_priv->claim_dest.group)); + seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", + "Originator", "VID", "last seen", "CRC"); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + msecs = jiffies_to_msecs(jiffies - + backbone_gw->lasttime); + secs = msecs / 1000; + msecs = msecs % 1000; + + is_own = batadv_compare_eth(backbone_gw->orig, + primary_addr); + if (is_own) + continue; + + seq_printf(seq, + " * %pM on % 5d % 4i.%03is (%04x)\n", + backbone_gw->orig, backbone_gw->vid, + secs, msecs, backbone_gw->crc); + } + rcu_read_unlock(); + } +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 3f0dfa2f6d7c..789cb73bde67 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -27,6 +27,8 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset); int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig); int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet, @@ -65,6 +67,12 @@ static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, return 0; } +static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset) +{ + return 0; +} + static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 34fbb1667bcd..391d4fb2026f 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -267,6 +267,15 @@ static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) return single_open(file, batadv_bla_claim_table_seq_print_text, net_dev); } + +static int batadv_bla_backbone_table_open(struct inode *inode, + struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_backbone_table_seq_print_text, + net_dev); +} + #endif static int batadv_transtable_local_open(struct inode *inode, struct file *file) @@ -305,6 +314,8 @@ static BATADV_DEBUGINFO(transtable_global, S_IRUGO, batadv_transtable_global_open); #ifdef CONFIG_BATMAN_ADV_BLA static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); +static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, + batadv_bla_backbone_table_open); #endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); @@ -316,6 +327,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_transtable_global, #ifdef CONFIG_BATMAN_ADV_BLA &batadv_debuginfo_bla_claim_table, + &batadv_debuginfo_bla_backbone_table, #endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, -- cgit v1.2.1 From 99e966fc969360bed8d3bb71c144fa10ba7a12d0 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 23 Jun 2012 12:34:17 +0200 Subject: batman-adv: correct comments in bridge loop avoidance Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 7e60466f5718..5f1ad80d5163 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -281,7 +281,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, NULL, /* Ethernet SRC/HW SRC: originator mac */ primary_if->net_dev->dev_addr, - /* HW DST: FF:43:05:XX:00:00 + /* HW DST: FF:43:05:XX:YY:YY * with XX = claim type * and YY:YY = group id */ @@ -323,7 +323,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame - * set HW SRC to the special mac containg the crc + * set HW SRC and header destination to the receiving backbone + * gws mac */ memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); -- cgit v1.2.1 From 3eb8773e3a24d88ca528993af3756af70f307a82 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 23 Jun 2012 12:34:18 +0200 Subject: batman-adv: rename bridge loop avoidance claim types for consistency reasons within the code and with the documentation, we should always call it "claim" and "unclaim". Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 18 +++++++++--------- net/batman-adv/packet.h | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5f1ad80d5163..84dd8415ab6a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -295,7 +295,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, /* now we pretend that the client would have sent this ... */ switch (claimtype) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: /* normal claim frame * set Ethernet SRC to the clients mac */ @@ -303,7 +303,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); break; - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame * set HW SRC to the clients mac */ @@ -468,7 +468,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, continue; batadv_bla_send_claim(bat_priv, claim->addr, claim->vid, - BATADV_CLAIM_TYPE_ADD); + BATADV_CLAIM_TYPE_CLAIM); } rcu_read_unlock(); } @@ -703,7 +703,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, if (primary_if && batadv_compare_eth(backbone_addr, primary_if->net_dev->dev_addr)) batadv_bla_send_claim(bat_priv, claim_addr, vid, - BATADV_CLAIM_TYPE_DEL); + BATADV_CLAIM_TYPE_UNCLAIM); backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid); @@ -739,7 +739,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw); if (batadv_compare_eth(backbone_addr, primary_if->net_dev->dev_addr)) batadv_bla_send_claim(bat_priv, claim_addr, vid, - BATADV_CLAIM_TYPE_ADD); + BATADV_CLAIM_TYPE_CLAIM); /* TODO: we could call something like tt_local_del() here. */ @@ -784,12 +784,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, * otherwise assume it is in the hw_src */ switch (bla_dst->type) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: backbone_addr = hw_src; break; case BATADV_CLAIM_TYPE_REQUEST: case BATADV_CLAIM_TYPE_ANNOUNCE: - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: backbone_addr = ethhdr->h_source; break; default: @@ -905,12 +905,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, /* check for the different types of claim frames ... */ switch (bla_dst->type) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: if (batadv_handle_claim(bat_priv, primary_if, hw_src, ethhdr->h_source, vid)) return 1; break; - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: if (batadv_handle_unclaim(bat_priv, primary_if, ethhdr->h_source, hw_src, vid)) return 1; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 8d3e55a96adc..65d66e4ee757 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -92,8 +92,8 @@ enum batadv_tt_client_flags { /* claim frame types for the bridge loop avoidance */ enum batadv_bla_claimframe { - BATADV_CLAIM_TYPE_ADD = 0x00, - BATADV_CLAIM_TYPE_DEL = 0x01, + BATADV_CLAIM_TYPE_CLAIM = 0x00, + BATADV_CLAIM_TYPE_UNCLAIM = 0x01, BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, BATADV_CLAIM_TYPE_REQUEST = 0x03, }; -- cgit v1.2.1 From 1c9b0550f4813c4931b4e142c80f5c89be9489ec Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sat, 23 Jun 2012 11:47:53 +0200 Subject: batman-adv: convert remaining packet counters to per_cpu_ptr() infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marek Lindner Acked-by: Martin Hundebøll Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 5 ++-- net/batman-adv/soft-interface.c | 51 ++++++++++++++++++++++------------ net/batman-adv/types.h | 5 ++++ 3 files changed, 42 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 84dd8415ab6a..c9732acd59ff 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -340,8 +340,9 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + batadv_inc_counter(bat_priv, BATADV_CNT_RX); + batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, + skb->len + ETH_HLEN); soft_iface->last_rx = jiffies; netif_rx(skb); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2aae96c..2b3842b99096 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -93,7 +93,14 @@ static int batadv_interface_release(struct net_device *dev) static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); - return &bat_priv->stats; + struct net_device_stats *stats = &bat_priv->stats; + + stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); + stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); + stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED); + stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX); + stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES); + return stats; } static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) @@ -246,14 +253,14 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped_freed; } - bat_priv->stats.tx_packets++; - bat_priv->stats.tx_bytes += data_len; + batadv_inc_counter(bat_priv, BATADV_CNT_TX); + batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len); goto end; dropped: kfree_skb(skb); dropped_freed: - bat_priv->stats.tx_dropped++; + batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: if (primary_if) batadv_hardif_free_ref(primary_if); @@ -308,8 +315,9 @@ void batadv_interface_rx(struct net_device *soft_iface, /* skb->ip_summed = CHECKSUM_UNNECESSARY; */ - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + batadv_inc_counter(bat_priv, BATADV_CNT_RX); + batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, + skb->len + ETH_HLEN); soft_iface->last_rx = jiffies; @@ -379,15 +387,22 @@ struct net_device *batadv_softif_create(const char *name) if (!soft_iface) goto out; + bat_priv = netdev_priv(soft_iface); + + /* batadv_interface_stats() needs to be available as soon as + * register_netdevice() has been called + */ + bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); + if (!bat_priv->bat_counters) + goto free_soft_iface; + ret = register_netdevice(soft_iface); if (ret < 0) { pr_err("Unable to register the batman interface '%s': %i\n", name, ret); - goto free_soft_iface; + goto free_bat_counters; } - bat_priv = netdev_priv(soft_iface); - atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); atomic_set(&bat_priv->bridge_loop_avoidance, 0); @@ -417,17 +432,13 @@ struct net_device *batadv_softif_create(const char *name) bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; - bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); - if (!bat_priv->bat_counters) - goto unreg_soft_iface; - ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) - goto free_bat_counters; + goto unreg_soft_iface; ret = batadv_sysfs_add_meshif(soft_iface); if (ret < 0) - goto free_bat_counters; + goto unreg_soft_iface; ret = batadv_debugfs_add_meshif(soft_iface); if (ret < 0) @@ -443,12 +454,13 @@ unreg_debugfs: batadv_debugfs_del_meshif(soft_iface); unreg_sysfs: batadv_sysfs_del_meshif(soft_iface); -free_bat_counters: - free_percpu(bat_priv->bat_counters); unreg_soft_iface: + free_percpu(bat_priv->bat_counters); unregister_netdevice(soft_iface); return NULL; +free_bat_counters: + free_percpu(bat_priv->bat_counters); free_soft_iface: free_netdev(soft_iface); out: @@ -518,6 +530,11 @@ static u32 batadv_get_link(struct net_device *dev) static const struct { const char name[ETH_GSTRING_LEN]; } batadv_counters_strings[] = { + { "tx" }, + { "tx_bytes" }, + { "tx_dropped" }, + { "rx" }, + { "rx_bytes" }, { "forward" }, { "forward_bytes" }, { "mgmt_tx" }, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 12635fd2c3d3..7812b6edda13 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -145,6 +145,11 @@ struct batadv_bcast_duplist_entry { #endif enum batadv_counters { + BATADV_CNT_TX, + BATADV_CNT_TX_BYTES, + BATADV_CNT_TX_DROPPED, + BATADV_CNT_RX, + BATADV_CNT_RX_BYTES, BATADV_CNT_FORWARD, BATADV_CNT_FORWARD_BYTES, BATADV_CNT_MGMT_TX, -- cgit v1.2.1 From 3a7f291bf6d7ef81d45c54ccad4f5eda506076b4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 30 Jun 2012 10:49:13 -0600 Subject: batman-adv: remove a misleading comment As much as I'm happy to see LWN links sprinkled through the kernel by the dozen, this one in particular reflects a very old state of reality; the associated comment is now incorrect. So just delete it. Signed-off-by: Jonathan Corbet Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 13c88b25ab31..2a1f24328b81 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -58,9 +58,6 @@ static int __init batadv_init(void) batadv_iv_init(); - /* the name should not be longer than 10 chars - see - * http://lwn.net/Articles/23634/ - */ batadv_event_workqueue = create_singlethread_workqueue("bat_events"); if (!batadv_event_workqueue) -- cgit v1.2.1 From d657e621a0f5584cf269b5b3d733a4cbb2497900 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 1 Jul 2012 14:09:12 +0200 Subject: batman-adv: add reference counting for type batadv_tt_orig_list_entry The batadv_tt_orig_list_entry structure didn't have any refcounting mechanism so far. This patch introduces it and makes the structure being usable in much more complex context. Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 75 ++++++++++++++++++++++++++------------ net/batman-adv/types.h | 1 + 2 files changed, 53 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 99dd8f75b3ff..eb8490e504e2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -152,6 +152,8 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { + if (!atomic_dec_and_test(&orig_entry->refcount)) + return; /* to avoid race conditions, immediately decrease the tt counter */ atomic_dec(&orig_entry->orig_node->tt_size); call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); @@ -625,50 +627,82 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->tt_changes_list_lock); } -/* find out if an orig_node is already in the list of a tt_global_entry. - * returns 1 if found, 0 otherwise +/* retrieves the orig_tt_list_entry belonging to orig_node from the + * batadv_tt_global_entry list + * + * returns it with an increased refcounter, NULL if not found */ -static bool -batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) +static struct batadv_tt_orig_list_entry * +batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, + const struct batadv_orig_node *orig_node) { - struct batadv_tt_orig_list_entry *tmp_orig_entry; + struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; struct hlist_node *node; - bool found = false; rcu_read_lock(); head = &entry->orig_list; hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) { - if (tmp_orig_entry->orig_node == orig_node) { - found = true; - break; - } + if (tmp_orig_entry->orig_node != orig_node) + continue; + if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) + continue; + + orig_entry = tmp_orig_entry; + break; } rcu_read_unlock(); + + return orig_entry; +} + +/* find out if an orig_node is already in the list of a tt_global_entry. + * returns true if found, false otherwise + */ +static bool +batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, + const struct batadv_orig_node *orig_node) +{ + struct batadv_tt_orig_list_entry *orig_entry; + bool found = false; + + orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); + if (orig_entry) { + found = true; + batadv_tt_orig_list_entry_free_ref(orig_entry); + } + return found; } static void -batadv_tt_global_add_orig_entry(struct batadv_tt_global_entry *tt_global_entry, +batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, struct batadv_orig_node *orig_node, int ttvn) { struct batadv_tt_orig_list_entry *orig_entry; + orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); + if (orig_entry) + goto out; + orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); if (!orig_entry) - return; + goto out; INIT_HLIST_NODE(&orig_entry->list); atomic_inc(&orig_node->refcount); atomic_inc(&orig_node->tt_size); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; + atomic_set(&orig_entry->refcount, 2); - spin_lock_bh(&tt_global_entry->list_lock); + spin_lock_bh(&tt_global->list_lock); hlist_add_head_rcu(&orig_entry->list, - &tt_global_entry->orig_list); - spin_unlock_bh(&tt_global_entry->list_lock); + &tt_global->orig_list); + spin_unlock_bh(&tt_global->list_lock); +out: + if (orig_entry) + batadv_tt_orig_list_entry_free_ref(orig_entry); } /* caller must hold orig_node refcount */ @@ -709,9 +743,6 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, batadv_tt_global_entry_free_ref(tt_global_entry); goto out_remove; } - - batadv_tt_global_add_orig_entry(tt_global_entry, orig_node, - ttvn); } else { /* there is already a global entry, use this one. */ @@ -728,11 +759,9 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, tt_global_entry->roam_at = 0; } - if (!batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) - batadv_tt_global_add_orig_entry(tt_global_entry, - orig_node, ttvn); } + /* add the new orig_entry (if needed) */ + batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7812b6edda13..664ef8148555 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -282,6 +282,7 @@ struct batadv_tt_global_entry { struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; uint8_t ttvn; + atomic_t refcount; struct rcu_head rcu; struct hlist_node list; }; -- cgit v1.2.1 From a51fb9b2ac4eac86ff13c30620758b87f4f3f5ce Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 1 Jul 2012 19:07:31 +0200 Subject: batman-adv: fix typos in comments the word millisecond is misspelled in several comments. This patch fixes it. Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 8 ++++---- net/batman-adv/vis.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 5d8fa0757947..4dfeae5e344a 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -41,13 +41,13 @@ * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE */ #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */ -#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in miliseconds */ -#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in miliseconds */ +#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ +#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ #define BATADV_TQ_LOCAL_WINDOW_SIZE 64 -/* miliseconds we have to keep pending tt_req */ +/* milliseconds we have to keep pending tt_req */ #define BATADV_TT_REQUEST_TIMEOUT 3000 #define BATADV_TQ_GLOBAL_WINDOW_SIZE 5 @@ -59,7 +59,7 @@ #define BATADV_TT_OGM_APPEND_MAX 3 /* Time in which a client can roam at most ROAMING_MAX_COUNT times in - * miliseconds + * milliseconds */ #define BATADV_ROAMING_MAX_TIME 20000 #define BATADV_ROAMING_MAX_COUNT 5 diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 84e716ed8963..873282fa86da 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -20,7 +20,7 @@ #ifndef _NET_BATMAN_ADV_VIS_H_ #define _NET_BATMAN_ADV_VIS_H_ -/* timeout of vis packets in miliseconds */ +/* timeout of vis packets in milliseconds */ #define BATADV_VIS_TIMEOUT 200000 int batadv_vis_seq_print_text(struct seq_file *seq, void *offset); -- cgit v1.2.1 From 624463079e0af455a2d70d2a59b9e2f6b5827aea Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sun, 1 Jul 2012 22:51:55 +0200 Subject: batman-adv: check batadv_orig_hash_add_if() return code If this call fails, some of the orig_nodes spaces may have been resized for the increased number of interface, and some may not. If we would just continue with the larger number of interfaces, this would lead to access to not allocated memory later. We better check the return code, and don't add the interface if no memory is available. OTOH, keeping some of the orig_nodes with too much memory allocated should hurt no one (except for a few too many bytes allocated). Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 282bf6e9353e..2c5a247a8f12 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -313,7 +313,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; hard_iface->if_status = BATADV_IF_INACTIVE; - batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); + ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); + if (ret < 0) { + bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->num_ifaces--; + hard_iface->if_status = BATADV_IF_NOT_IN_USE; + goto err_dev; + } hard_iface->batman_adv_ptype.type = ethertype; hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; -- cgit v1.2.1 From 807736f6e00714fdeb443b31061d1c27fa903296 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 15 Jul 2012 22:26:51 +0200 Subject: batman-adv: Split batadv_priv in sub-structures for features The structure batadv_priv grows everytime a new feature is introduced. It gets hard to find the parts of the struct that belongs to a specific feature. This becomes even harder by the fact that not every feature uses a prefix in the member name. The variables for bridge loop avoidence, gateway handling, translation table and visualization server are moved into separate structs that are included in the bat_priv main struct. Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 4 +- net/batman-adv/bridge_loop_avoidance.c | 115 +++++++++--------- net/batman-adv/gateway_client.c | 32 ++--- net/batman-adv/hard-interface.c | 5 +- net/batman-adv/main.c | 24 ++-- net/batman-adv/routing.c | 6 +- net/batman-adv/soft-interface.c | 17 +-- net/batman-adv/translation-table.c | 208 +++++++++++++++++---------------- net/batman-adv/types.h | 113 ++++++++++-------- net/batman-adv/vis.c | 130 +++++++++++---------- 10 files changed, 343 insertions(+), 311 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8bdd1e..eb507c901696 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -603,8 +603,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) htonl((uint32_t)atomic_read(&hard_iface->seqno)); atomic_inc(&hard_iface->seqno); - batadv_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn); - batadv_ogm_packet->tt_crc = htons(bat_priv->tt_crc); + batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); + batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); if (tt_num_changes >= 0) batadv_ogm_packet->tt_num_changes = tt_num_changes; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index c9732acd59ff..ad18017da4e7 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -133,7 +133,7 @@ static void batadv_claim_free_ref(struct batadv_claim *claim) static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, struct batadv_claim *data) { - struct batadv_hashtable *hash = bat_priv->claim_hash; + struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_claim *claim; @@ -174,7 +174,7 @@ static struct batadv_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, uint8_t *addr, short vid) { - struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_backbone_gw search_entry, *backbone_gw; @@ -218,7 +218,7 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) int i; spinlock_t *list_lock; /* protects write access to the hash lists */ - hash = backbone_gw->bat_priv->claim_hash; + hash = backbone_gw->bat_priv->bla.claim_hash; if (!hash) return; @@ -265,7 +265,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, if (!primary_if) return; - memcpy(&local_claim_dest, &bat_priv->claim_dest, + memcpy(&local_claim_dest, &bat_priv->bla.claim_dest, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; @@ -391,7 +391,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, /* one for the hash, one for returning */ atomic_set(&entry->refcount, 2); - hash_added = batadv_hash_add(bat_priv->backbone_hash, + hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, batadv_compare_backbone_gw, batadv_choose_backbone_gw, entry, &entry->hash_entry); @@ -458,7 +458,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, if (!backbone_gw) return; - hash = bat_priv->claim_hash; + hash = bat_priv->bla.claim_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -499,7 +499,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) /* no local broadcasts should be sent or received, for now. */ if (!atomic_read(&backbone_gw->request_sent)) { - atomic_inc(&backbone_gw->bat_priv->bla_num_requests); + atomic_inc(&backbone_gw->bat_priv->bla.num_requests); atomic_set(&backbone_gw->request_sent, 1); } } @@ -559,7 +559,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", mac, vid); - hash_added = batadv_hash_add(bat_priv->claim_hash, + hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, &claim->hash_entry); @@ -612,7 +612,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", mac, vid); - batadv_hash_remove(bat_priv->claim_hash, batadv_compare_claim, + batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); batadv_claim_free_ref(claim); /* reference from the hash is gone */ @@ -659,7 +659,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, * we can allow traffic again. */ if (atomic_read(&backbone_gw->request_sent)) { - atomic_dec(&backbone_gw->bat_priv->bla_num_requests); + atomic_dec(&backbone_gw->bat_priv->bla.num_requests); atomic_set(&backbone_gw->request_sent, 0); } } @@ -774,7 +774,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; bla_dst = (struct batadv_bla_claim_dst *)hw_dst; - bla_dst_own = &bat_priv->claim_dest; + bla_dst_own = &bat_priv->bla.claim_dest; /* check if it is a claim packet in general */ if (memcmp(bla_dst->magic, bla_dst_own->magic, @@ -947,7 +947,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) spinlock_t *list_lock; /* protects write access to the hash lists */ int i; - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) return; @@ -971,7 +971,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) purge_now: /* don't wait for the pending request anymore */ if (atomic_read(&backbone_gw->request_sent)) - atomic_dec(&bat_priv->bla_num_requests); + atomic_dec(&bat_priv->bla.num_requests); batadv_bla_del_backbone_claims(backbone_gw); @@ -1001,7 +1001,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, struct batadv_hashtable *hash; int i; - hash = bat_priv->claim_hash; + hash = bat_priv->bla.claim_hash; if (!hash) return; @@ -1048,11 +1048,12 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; + __be16 group; int i; /* reset bridge loop avoidance group id */ - bat_priv->claim_dest.group = - htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); + group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); + bat_priv->bla.claim_dest.group = group; if (!oldif) { batadv_bla_purge_claims(bat_priv, NULL, 1); @@ -1060,7 +1061,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, return; } - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) return; @@ -1090,8 +1091,8 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, /* (re)start the timer */ static void batadv_bla_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->bla_work, batadv_bla_periodic_work); - queue_delayed_work(batadv_event_workqueue, &bat_priv->bla_work, + INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); } @@ -1104,6 +1105,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct batadv_priv *bat_priv; + struct batadv_priv_bla *priv_bla; struct hlist_node *node; struct hlist_head *head; struct batadv_backbone_gw *backbone_gw; @@ -1111,7 +1113,8 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct batadv_hard_iface *primary_if; int i; - bat_priv = container_of(delayed_work, struct batadv_priv, bla_work); + priv_bla = container_of(delayed_work, struct batadv_priv_bla, work); + bat_priv = container_of(priv_bla, struct batadv_priv, bla); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; @@ -1122,7 +1125,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto out; - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) goto out; @@ -1162,40 +1165,41 @@ int batadv_bla_init(struct batadv_priv *bat_priv) int i; uint8_t claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00}; struct batadv_hard_iface *primary_if; + uint16_t crc; + unsigned long entrytime; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hash registering\n"); /* setting claim destination address */ - memcpy(&bat_priv->claim_dest.magic, claim_dest, 3); - bat_priv->claim_dest.type = 0; + memcpy(&bat_priv->bla.claim_dest.magic, claim_dest, 3); + bat_priv->bla.claim_dest.type = 0; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if) { - bat_priv->claim_dest.group = - htons(crc16(0, primary_if->net_dev->dev_addr, - ETH_ALEN)); + crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN); + bat_priv->bla.claim_dest.group = htons(crc); batadv_hardif_free_ref(primary_if); } else { - bat_priv->claim_dest.group = 0; /* will be set later */ + bat_priv->bla.claim_dest.group = 0; /* will be set later */ } /* initialize the duplicate list */ + entrytime = jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT); for (i = 0; i < BATADV_DUPLIST_SIZE; i++) - bat_priv->bcast_duplist[i].entrytime = - jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT); - bat_priv->bcast_duplist_curr = 0; + bat_priv->bla.bcast_duplist[i].entrytime = entrytime; + bat_priv->bla.bcast_duplist_curr = 0; - if (bat_priv->claim_hash) + if (bat_priv->bla.claim_hash) return 0; - bat_priv->claim_hash = batadv_hash_new(128); - bat_priv->backbone_hash = batadv_hash_new(32); + bat_priv->bla.claim_hash = batadv_hash_new(128); + bat_priv->bla.backbone_hash = batadv_hash_new(32); - if (!bat_priv->claim_hash || !bat_priv->backbone_hash) + if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) return -ENOMEM; - batadv_hash_set_lock_class(bat_priv->claim_hash, + batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); - batadv_hash_set_lock_class(bat_priv->backbone_hash, + batadv_hash_set_lock_class(bat_priv->bla.backbone_hash, &batadv_backbone_hash_lock_class_key); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n"); @@ -1236,8 +1240,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, crc = crc16(0, content, length); for (i = 0; i < BATADV_DUPLIST_SIZE; i++) { - curr = (bat_priv->bcast_duplist_curr + i) % BATADV_DUPLIST_SIZE; - entry = &bat_priv->bcast_duplist[curr]; + curr = (bat_priv->bla.bcast_duplist_curr + i); + curr %= BATADV_DUPLIST_SIZE; + entry = &bat_priv->bla.bcast_duplist[curr]; /* we can stop searching if the entry is too old ; * later entries will be even older @@ -1258,13 +1263,13 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, return 1; } /* not found, add a new entry (overwrite the oldest entry) */ - curr = (bat_priv->bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); + curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); curr %= BATADV_DUPLIST_SIZE; - entry = &bat_priv->bcast_duplist[curr]; + entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; memcpy(entry->orig, bcast_packet->orig, ETH_ALEN); - bat_priv->bcast_duplist_curr = curr; + bat_priv->bla.bcast_duplist_curr = curr; /* allow it, its the first occurence. */ return 0; @@ -1281,7 +1286,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, */ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { - struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_backbone_gw *backbone_gw; @@ -1361,18 +1366,18 @@ void batadv_bla_free(struct batadv_priv *bat_priv) { struct batadv_hard_iface *primary_if; - cancel_delayed_work_sync(&bat_priv->bla_work); + cancel_delayed_work_sync(&bat_priv->bla.work); primary_if = batadv_primary_if_get_selected(bat_priv); - if (bat_priv->claim_hash) { + if (bat_priv->bla.claim_hash) { batadv_bla_purge_claims(bat_priv, primary_if, 1); - batadv_hash_destroy(bat_priv->claim_hash); - bat_priv->claim_hash = NULL; + batadv_hash_destroy(bat_priv->bla.claim_hash); + bat_priv->bla.claim_hash = NULL; } - if (bat_priv->backbone_hash) { + if (bat_priv->bla.backbone_hash) { batadv_bla_purge_backbone_gw(bat_priv, 1); - batadv_hash_destroy(bat_priv->backbone_hash); - bat_priv->backbone_hash = NULL; + batadv_hash_destroy(bat_priv->bla.backbone_hash); + bat_priv->bla.backbone_hash = NULL; } if (primary_if) batadv_hardif_free_ref(primary_if); @@ -1411,7 +1416,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, goto allow; - if (unlikely(atomic_read(&bat_priv->bla_num_requests))) + if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) goto handled; @@ -1510,7 +1515,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) ethhdr = (struct ethhdr *)skb_mac_header(skb); - if (unlikely(atomic_read(&bat_priv->bla_num_requests))) + if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest)) goto handled; @@ -1566,7 +1571,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->claim_hash; + struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct batadv_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_node *node; @@ -1595,7 +1600,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Claims announced for the mesh %s (orig %pM, group id %04x)\n", net_dev->name, primary_addr, - ntohs(bat_priv->claim_dest.group)); + ntohs(bat_priv->bla.claim_dest.group)); seq_printf(seq, " %-17s %-5s %-17s [o] (%-4s)\n", "Client", "VID", "Originator", "CRC"); for (i = 0; i < hash->size; i++) { @@ -1623,7 +1628,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct batadv_backbone_gw *backbone_gw; struct batadv_hard_iface *primary_if; struct hlist_node *node; @@ -1653,7 +1658,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", net_dev->name, primary_addr, - ntohs(bat_priv->claim_dest.group)); + ntohs(bat_priv->bla.claim_dest.group)); seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", "Originator", "VID", "last seen", "CRC"); for (i = 0; i < hash->size; i++) { diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index fc866f2e4528..eef7cc739397 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -48,7 +48,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) struct batadv_gw_node *gw_node; rcu_read_lock(); - gw_node = rcu_dereference(bat_priv->curr_gw); + gw_node = rcu_dereference(bat_priv->gw.curr_gw); if (!gw_node) goto out; @@ -91,23 +91,23 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, { struct batadv_gw_node *curr_gw_node; - spin_lock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) new_gw_node = NULL; - curr_gw_node = rcu_dereference_protected(bat_priv->curr_gw, 1); - rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); + curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); + rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); if (curr_gw_node) batadv_gw_node_free_ref(curr_gw_node); - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_unlock_bh(&bat_priv->gw.list_lock); } void batadv_gw_deselect(struct batadv_priv *bat_priv) { - atomic_set(&bat_priv->gw_reselect, 1); + atomic_set(&bat_priv->gw.reselect, 1); } static struct batadv_gw_node * @@ -122,7 +122,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) struct batadv_orig_node *orig_node; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; @@ -202,7 +202,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect) && curr_gw) + if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw) goto out; next_gw = batadv_gw_get_best_gw_node(bat_priv); @@ -321,9 +321,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); - spin_lock_bh(&bat_priv->gw_list_lock); - hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); + hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); + spin_unlock_bh(&bat_priv->gw.list_lock); batadv_gw_bandwidth_to_kbit(new_gwflags, &down, &up); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -350,7 +350,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->orig_node != orig_node) continue; @@ -404,10 +404,10 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - spin_lock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); hlist_for_each_entry_safe(gw_node, node, node_tmp, - &bat_priv->gw_list, list) { + &bat_priv->gw.list, list) { if (((!gw_node->deleted) || (time_before(jiffies, gw_node->deleted + timeout))) && atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) @@ -420,7 +420,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) batadv_gw_node_free_ref(gw_node); } - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_unlock_bh(&bat_priv->gw.list_lock); /* gw_deselect() needs to acquire the gw_list_lock */ if (do_deselect) @@ -496,7 +496,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 2c5a247a8f12..d112fd6750b0 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -103,13 +103,14 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, { struct batadv_vis_packet *vis_packet; struct batadv_hard_iface *primary_if; + struct sk_buff *skb; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; - vis_packet = (struct batadv_vis_packet *) - bat_priv->my_vis_info->skb_packet->data; + skb = bat_priv->vis.my_info->skb_packet; + vis_packet = (struct batadv_vis_packet *)skb->data; memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); memcpy(vis_packet->sender_orig, primary_if->net_dev->dev_addr, ETH_ALEN); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 2a1f24328b81..b4aa470bc4a6 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -94,20 +94,20 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); - spin_lock_init(&bat_priv->tt_changes_list_lock); - spin_lock_init(&bat_priv->tt_req_list_lock); - spin_lock_init(&bat_priv->tt_roam_list_lock); - spin_lock_init(&bat_priv->tt_buff_lock); - spin_lock_init(&bat_priv->gw_list_lock); - spin_lock_init(&bat_priv->vis_hash_lock); - spin_lock_init(&bat_priv->vis_list_lock); + spin_lock_init(&bat_priv->tt.changes_list_lock); + spin_lock_init(&bat_priv->tt.req_list_lock); + spin_lock_init(&bat_priv->tt.roam_list_lock); + spin_lock_init(&bat_priv->tt.last_changeset_lock); + spin_lock_init(&bat_priv->gw.list_lock); + spin_lock_init(&bat_priv->vis.hash_lock); + spin_lock_init(&bat_priv->vis.list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); - INIT_HLIST_HEAD(&bat_priv->gw_list); - INIT_LIST_HEAD(&bat_priv->tt_changes_list); - INIT_LIST_HEAD(&bat_priv->tt_req_list); - INIT_LIST_HEAD(&bat_priv->tt_roam_list); + INIT_HLIST_HEAD(&bat_priv->gw.list); + INIT_LIST_HEAD(&bat_priv->tt.changes_list); + INIT_LIST_HEAD(&bat_priv->tt.req_list); + INIT_LIST_HEAD(&bat_priv->tt.roam_list); ret = batadv_originator_init(bat_priv); if (ret < 0) @@ -128,7 +128,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - atomic_set(&bat_priv->gw_reselect, 0); + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); return 0; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index bc2b88bbea1f..d5edee7ecfa8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -721,7 +721,7 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) * been incremented yet. This flag will make me check all the incoming * packets for the correct destination. */ - bat_priv->tt_poss_change = true; + bat_priv->tt.poss_change = true; batadv_orig_node_free_ref(orig_node); out: @@ -947,8 +947,8 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, unicast_packet = (struct batadv_unicast_packet *)skb->data; if (batadv_is_my_mac(unicast_packet->dest)) { - tt_poss_change = bat_priv->tt_poss_change; - curr_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + tt_poss_change = bat_priv->tt.poss_change; + curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); } else { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2b3842b99096..6d44625c0f5e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -420,14 +420,15 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); - atomic_set(&bat_priv->ttvn, 0); - atomic_set(&bat_priv->tt_local_changes, 0); - atomic_set(&bat_priv->tt_ogm_append_cnt, 0); - atomic_set(&bat_priv->bla_num_requests, 0); - - bat_priv->tt_buff = NULL; - bat_priv->tt_buff_len = 0; - bat_priv->tt_poss_change = false; + atomic_set(&bat_priv->tt.vn, 0); + atomic_set(&bat_priv->tt.local_changes, 0); + atomic_set(&bat_priv->tt.ogm_append_cnt, 0); +#ifdef CONFIG_BATMAN_ADV_BLA + atomic_set(&bat_priv->bla.num_requests, 0); +#endif + bat_priv->tt.last_changeset = NULL; + bat_priv->tt.last_changeset_len = 0; + bat_priv->tt.poss_change = false; bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index eb8490e504e2..b01049a7a912 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -46,8 +46,8 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) static void batadv_tt_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->tt_work, batadv_tt_purge); - queue_delayed_work(batadv_event_workqueue, &bat_priv->tt_work, + INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, msecs_to_jiffies(5000)); } @@ -88,7 +88,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt_local_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, data); if (tt_common_entry) tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -102,7 +102,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt_global_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, data); if (tt_common_entry) tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -177,8 +177,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del_op_requested = flags & BATADV_TT_CLIENT_DEL; /* check for ADD+DEL or DEL+ADD events */ - spin_lock_bh(&bat_priv->tt_changes_list_lock); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + spin_lock_bh(&bat_priv->tt.changes_list_lock); + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (!batadv_compare_eth(entry->change.addr, addr)) continue; @@ -205,15 +205,15 @@ del: } /* track the change in the OGMinterval list */ - list_add_tail(&tt_change_node->list, &bat_priv->tt_changes_list); + list_add_tail(&tt_change_node->list, &bat_priv->tt.changes_list); unlock: - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); if (event_removed) - atomic_dec(&bat_priv->tt_local_changes); + atomic_dec(&bat_priv->tt.local_changes); else - atomic_inc(&bat_priv->tt_local_changes); + atomic_inc(&bat_priv->tt.local_changes); } int batadv_tt_len(int changes_num) @@ -223,12 +223,12 @@ int batadv_tt_len(int changes_num) static int batadv_tt_local_init(struct batadv_priv *bat_priv) { - if (bat_priv->tt_local_hash) + if (bat_priv->tt.local_hash) return 0; - bat_priv->tt_local_hash = batadv_hash_new(1024); + bat_priv->tt.local_hash = batadv_hash_new(1024); - if (!bat_priv->tt_local_hash) + if (!bat_priv->tt.local_hash) return -ENOMEM; return 0; @@ -260,7 +260,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (ttvn: %d)\n", addr, - (uint8_t)atomic_read(&bat_priv->ttvn)); + (uint8_t)atomic_read(&bat_priv->tt.vn)); memcpy(tt_local_entry->common.addr, addr, ETH_ALEN); tt_local_entry->common.flags = BATADV_NO_FLAGS; @@ -279,7 +279,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, */ tt_local_entry->common.flags |= BATADV_TT_CLIENT_NEW; - hash_added = batadv_hash_add(bat_priv->tt_local_hash, batadv_compare_tt, + hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_orig, &tt_local_entry->common, &tt_local_entry->common.hash_entry); @@ -350,7 +350,7 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv, primary_if = batadv_primary_if_get_selected(bat_priv); req_len = min_packet_len; - req_len += batadv_tt_len(atomic_read(&bat_priv->tt_local_changes)); + req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented @@ -383,10 +383,10 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, if (new_len > 0) tot_changes = new_len / batadv_tt_len(1); - spin_lock_bh(&bat_priv->tt_changes_list_lock); - atomic_set(&bat_priv->tt_local_changes, 0); + spin_lock_bh(&bat_priv->tt.changes_list_lock); + atomic_set(&bat_priv->tt.local_changes, 0); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (count < tot_changes) { memcpy(tt_buff + batadv_tt_len(count), @@ -396,25 +396,25 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, list_del(&entry->list); kfree(entry); } - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); /* Keep the buffer for possible tt_request */ - spin_lock_bh(&bat_priv->tt_buff_lock); - kfree(bat_priv->tt_buff); - bat_priv->tt_buff_len = 0; - bat_priv->tt_buff = NULL; + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + kfree(bat_priv->tt.last_changeset); + bat_priv->tt.last_changeset_len = 0; + bat_priv->tt.last_changeset = NULL; /* check whether this new OGM has no changes due to size problems */ if (new_len > 0) { /* if kmalloc() fails we will reply with the full table * instead of providing the diff */ - bat_priv->tt_buff = kmalloc(new_len, GFP_ATOMIC); - if (bat_priv->tt_buff) { - memcpy(bat_priv->tt_buff, tt_buff, new_len); - bat_priv->tt_buff_len = new_len; + bat_priv->tt.last_changeset = kmalloc(new_len, GFP_ATOMIC); + if (bat_priv->tt.last_changeset) { + memcpy(bat_priv->tt.last_changeset, tt_buff, new_len); + bat_priv->tt.last_changeset_len = new_len; } } - spin_unlock_bh(&bat_priv->tt_buff_lock); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); return count; } @@ -423,7 +423,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; struct batadv_hard_iface *primary_if; struct hlist_node *node; @@ -448,7 +448,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn)); + net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -546,7 +546,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, static void batadv_tt_local_purge(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -572,10 +572,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct hlist_head *head; uint32_t i; - if (!bat_priv->tt_local_hash) + if (!bat_priv->tt.local_hash) return; - hash = bat_priv->tt_local_hash; + hash = bat_priv->tt.local_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -595,17 +595,17 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) batadv_hash_destroy(hash); - bat_priv->tt_local_hash = NULL; + bat_priv->tt.local_hash = NULL; } static int batadv_tt_global_init(struct batadv_priv *bat_priv) { - if (bat_priv->tt_global_hash) + if (bat_priv->tt.global_hash) return 0; - bat_priv->tt_global_hash = batadv_hash_new(1024); + bat_priv->tt.global_hash = batadv_hash_new(1024); - if (!bat_priv->tt_global_hash) + if (!bat_priv->tt.global_hash) return -ENOMEM; return 0; @@ -615,16 +615,16 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_change_node *entry, *safe; - spin_lock_bh(&bat_priv->tt_changes_list_lock); + spin_lock_bh(&bat_priv->tt.changes_list_lock); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); kfree(entry); } - atomic_set(&bat_priv->tt_local_changes, 0); - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + atomic_set(&bat_priv->tt.local_changes, 0); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); } /* retrieves the orig_tt_list_entry belonging to orig_node from the @@ -733,7 +733,7 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, INIT_HLIST_HEAD(&tt_global_entry->orig_list); spin_lock_init(&tt_global_entry->list_lock); - hash_added = batadv_hash_add(bat_priv->tt_global_hash, + hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, common, &common->hash_entry); @@ -812,7 +812,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; @@ -913,7 +913,7 @@ batadv_tt_global_del_struct(struct batadv_priv *bat_priv, "Deleting global tt entry %pM: %s\n", tt_global_entry->common.addr, message); - batadv_hash_remove(bat_priv->tt_global_hash, batadv_compare_tt, + batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, tt_global_entry->common.addr); batadv_tt_global_entry_free_ref(tt_global_entry); @@ -1024,7 +1024,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global; struct batadv_tt_common_entry *tt_common_entry; uint32_t i; - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_node *node, *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -1088,7 +1088,7 @@ static void batadv_tt_global_roam_purge_list(struct batadv_priv *bat_priv, static void batadv_tt_global_roam_purge(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -1114,10 +1114,10 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) struct hlist_head *head; uint32_t i; - if (!bat_priv->tt_global_hash) + if (!bat_priv->tt.global_hash) return; - hash = bat_priv->tt_global_hash; + hash = bat_priv->tt.global_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1137,7 +1137,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) batadv_hash_destroy(hash); - bat_priv->tt_global_hash = NULL; + bat_priv->tt.global_hash = NULL; } static bool @@ -1216,7 +1216,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { uint16_t total = 0, total_one; - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_node *node; @@ -1263,7 +1263,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) { uint16_t total = 0, total_one; - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_node *node; struct hlist_head *head; @@ -1296,14 +1296,14 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node, *safe; - spin_lock_bh(&bat_priv->tt_req_list_lock); + spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); } static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, @@ -1333,15 +1333,15 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node, *safe; - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { list_del(&node->list); kfree(node); } } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); } /* returns the pointer to the new tt_req_node struct if no request @@ -1353,8 +1353,8 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, { struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry(tt_req_node_tmp, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) { if (batadv_compare_eth(tt_req_node_tmp, orig_node) && !batadv_has_timed_out(tt_req_node_tmp->issued_at, BATADV_TT_REQUEST_TIMEOUT)) @@ -1368,9 +1368,9 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN); tt_req_node->issued_at = jiffies; - list_add(&tt_req_node->list, &bat_priv->tt_req_list); + list_add(&tt_req_node->list, &bat_priv->tt.req_list); unlock: - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); return tt_req_node; } @@ -1536,9 +1536,9 @@ out: if (ret) kfree_skb(skb); if (ret && tt_req_node) { - spin_lock_bh(&bat_priv->tt_req_list_lock); + spin_lock_bh(&bat_priv->tt.req_list_lock); list_del(&tt_req_node->list); - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); kfree(tt_req_node); } return ret; @@ -1629,7 +1629,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt_global_hash, + bat_priv->tt.global_hash, primary_if, batadv_tt_global_valid, req_dst_orig_node); @@ -1700,7 +1700,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); - my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); req_ttvn = tt_request->ttvn; orig_node = batadv_orig_hash_find(bat_priv, tt_request->src); @@ -1719,7 +1719,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, * is too big send the whole local translation table */ if (tt_request->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn || - !bat_priv->tt_buff) + !bat_priv->tt.last_changeset) full_table = true; else full_table = false; @@ -1728,8 +1728,8 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, * I'll send only one packet with as much TT entries as I can */ if (!full_table) { - spin_lock_bh(&bat_priv->tt_buff_lock); - tt_len = bat_priv->tt_buff_len; + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + tt_len = bat_priv->tt.last_changeset_len; tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; @@ -1744,16 +1744,16 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_response->tt_data = htons(tt_tot); tt_buff = skb->data + sizeof(*tt_response); - memcpy(tt_buff, bat_priv->tt_buff, - bat_priv->tt_buff_len); - spin_unlock_bh(&bat_priv->tt_buff_lock); + memcpy(tt_buff, bat_priv->tt.last_changeset, + bat_priv->tt.last_changeset_len); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { - tt_len = (uint16_t)atomic_read(&bat_priv->num_local_tt); + tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num); tt_len *= sizeof(struct batadv_tt_change); - ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt_local_hash, + bat_priv->tt.local_hash, primary_if, batadv_tt_local_valid_entry, NULL); @@ -1785,7 +1785,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, goto out; unlock: - spin_unlock_bh(&bat_priv->tt_buff_lock); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); out: if (orig_node) batadv_orig_node_free_ref(orig_node); @@ -1938,14 +1938,14 @@ void batadv_handle_tt_response(struct batadv_priv *bat_priv, } /* Delete the tt_req_node from pending tt_requests list */ - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (!batadv_compare_eth(node->addr, tt_response->src)) continue; list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); /* Recalculate the CRC for this orig_node and store it */ orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node); @@ -1979,22 +1979,22 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; - spin_lock_bh(&bat_priv->tt_roam_list_lock); + spin_lock_bh(&bat_priv->tt.roam_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); } static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; - spin_lock_bh(&bat_priv->tt_roam_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + spin_lock_bh(&bat_priv->tt.roam_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { if (!batadv_has_timed_out(node->first_time, BATADV_ROAMING_MAX_TIME)) continue; @@ -2002,7 +2002,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); } /* This function checks whether the client already reached the @@ -2017,11 +2017,11 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, struct batadv_tt_roam_node *tt_roam_node; bool ret = false; - spin_lock_bh(&bat_priv->tt_roam_list_lock); + spin_lock_bh(&bat_priv->tt.roam_list_lock); /* The new tt_req will be issued only if I'm not waiting for a * reply from the same orig_node yet */ - list_for_each_entry(tt_roam_node, &bat_priv->tt_roam_list, list) { + list_for_each_entry(tt_roam_node, &bat_priv->tt.roam_list, list) { if (!batadv_compare_eth(tt_roam_node->addr, client)) continue; @@ -2046,12 +2046,12 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, BATADV_ROAMING_MAX_COUNT - 1); memcpy(tt_roam_node->addr, client, ETH_ALEN); - list_add(&tt_roam_node->list, &bat_priv->tt_roam_list); + list_add(&tt_roam_node->list, &bat_priv->tt.roam_list); ret = true; } unlock: - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); return ret; } @@ -2115,10 +2115,12 @@ out: static void batadv_tt_purge(struct work_struct *work) { struct delayed_work *delayed_work; + struct batadv_priv_tt *priv_tt; struct batadv_priv *bat_priv; delayed_work = container_of(work, struct delayed_work, work); - bat_priv = container_of(delayed_work, struct batadv_priv, tt_work); + priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); + bat_priv = container_of(priv_tt, struct batadv_priv, tt); batadv_tt_local_purge(bat_priv); batadv_tt_global_roam_purge(bat_priv); @@ -2130,7 +2132,7 @@ static void batadv_tt_purge(struct work_struct *work) void batadv_tt_free(struct batadv_priv *bat_priv) { - cancel_delayed_work_sync(&bat_priv->tt_work); + cancel_delayed_work_sync(&bat_priv->tt.work); batadv_tt_local_table_free(bat_priv); batadv_tt_global_table_free(bat_priv); @@ -2138,7 +2140,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv) batadv_tt_changes_list_free(bat_priv); batadv_tt_roam_list_free(bat_priv); - kfree(bat_priv->tt_buff); + kfree(bat_priv->tt.last_changeset); } /* This function will enable or disable the specified flags for all the entries @@ -2182,7 +2184,7 @@ out: /* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; struct hlist_node *node, *node_tmp; @@ -2207,7 +2209,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) "Deleting local tt entry (%pM): pending\n", tt_common->addr); - atomic_dec(&bat_priv->num_local_tt); + atomic_dec(&bat_priv->tt.local_entry_num); hlist_del_rcu(node); tt_local = container_of(tt_common, struct batadv_tt_local_entry, @@ -2225,26 +2227,26 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, { uint16_t changed_num = 0; - if (atomic_read(&bat_priv->tt_local_changes) < 1) + if (atomic_read(&bat_priv->tt.local_changes) < 1) return -ENOENT; - changed_num = batadv_tt_set_flags(bat_priv->tt_local_hash, + changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash, BATADV_TT_CLIENT_NEW, false); /* all reset entries have to be counted as local entries */ - atomic_add(changed_num, &bat_priv->num_local_tt); + atomic_add(changed_num, &bat_priv->tt.local_entry_num); batadv_tt_local_purge_pending_clients(bat_priv); - bat_priv->tt_crc = batadv_tt_local_crc(bat_priv); + bat_priv->tt.local_crc = batadv_tt_local_crc(bat_priv); /* Increment the TTVN only once per OGM interval */ - atomic_inc(&bat_priv->ttvn); + atomic_inc(&bat_priv->tt.vn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Local changes committed, updating to ttvn %u\n", - (uint8_t)atomic_read(&bat_priv->ttvn)); - bat_priv->tt_poss_change = false; + (uint8_t)atomic_read(&bat_priv->tt.vn)); + bat_priv->tt.poss_change = false; /* reset the sending counter */ - atomic_set(&bat_priv->tt_ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); + atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); return batadv_tt_changes_fill_buff(bat_priv, packet_buff, packet_buff_len, packet_min_len); @@ -2264,7 +2266,7 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv, /* if the changes have been sent often enough */ if ((tt_num_changes < 0) && - (!batadv_atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))) { + (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))) { batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, packet_min_len); tt_num_changes = 0; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 664ef8148555..97c4978dee69 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -165,6 +165,67 @@ enum batadv_counters { BATADV_CNT_NUM, }; +/** + * struct batadv_priv_tt - per mesh interface translation table data + * @vn: translation table version number + * @local_changes: changes registered in an originator interval + * @poss_change: Detect an ongoing roaming phase. If true, then this node + * received a roaming_adv and has to inspect every packet directed to it to + * check whether it still is the true destination or not. This flag will be + * reset to false as soon as the this node's ttvn is increased + * @changes_list: tracks tt local changes within an originator interval + * @req_list: list of pending tt_requests + * @local_crc: Checksum of the local table, recomputed before sending a new OGM + */ +struct batadv_priv_tt { + atomic_t vn; + atomic_t ogm_append_cnt; + atomic_t local_changes; + bool poss_change; + struct list_head changes_list; + struct batadv_hashtable *local_hash; + struct batadv_hashtable *global_hash; + struct list_head req_list; + struct list_head roam_list; + spinlock_t changes_list_lock; /* protects changes */ + spinlock_t req_list_lock; /* protects req_list */ + spinlock_t roam_list_lock; /* protects roam_list */ + atomic_t local_entry_num; + uint16_t local_crc; + unsigned char *last_changeset; + int16_t last_changeset_len; + spinlock_t last_changeset_lock; /* protects last_changeset */ + struct delayed_work work; +}; + +#ifdef CONFIG_BATMAN_ADV_BLA +struct batadv_priv_bla { + atomic_t num_requests; /* number of bla requests in flight */ + struct batadv_hashtable *claim_hash; + struct batadv_hashtable *backbone_hash; + struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; + int bcast_duplist_curr; + struct batadv_bla_claim_dst claim_dest; + struct delayed_work work; +}; +#endif + +struct batadv_priv_gw { + struct hlist_head list; + spinlock_t list_lock; /* protects gw_list and curr_gw */ + struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t reselect; +}; + +struct batadv_priv_vis { + struct list_head send_list; + struct batadv_hashtable *hash; + spinlock_t hash_lock; /* protects hash */ + spinlock_t list_lock; /* protects info::recv_list */ + struct delayed_work work; + struct batadv_vis_info *my_info; +}; + struct batadv_priv { atomic_t mesh_state; struct net_device_stats stats; @@ -184,64 +245,24 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - atomic_t ttvn; /* translation table version number */ - atomic_t tt_ogm_append_cnt; - atomic_t tt_local_changes; /* changes registered in a OGM interval */ - atomic_t bla_num_requests; /* number of bla requests in flight */ - /* The tt_poss_change flag is used to detect an ongoing roaming phase. - * If true, then I received a Roaming_adv and I have to inspect every - * packet directed to me to check whether I am still the true - * destination or not. This flag will be reset to false as soon as I - * increase my TTVN - */ - bool tt_poss_change; char num_ifaces; struct batadv_debug_log *debug_log; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; struct hlist_head forw_bcast_list; - struct hlist_head gw_list; - struct list_head tt_changes_list; /* tracks changes in a OGM int */ - struct list_head vis_send_list; struct batadv_hashtable *orig_hash; - struct batadv_hashtable *tt_local_hash; - struct batadv_hashtable *tt_global_hash; -#ifdef CONFIG_BATMAN_ADV_BLA - struct batadv_hashtable *claim_hash; - struct batadv_hashtable *backbone_hash; -#endif - struct list_head tt_req_list; /* list of pending tt_requests */ - struct list_head tt_roam_list; - struct batadv_hashtable *vis_hash; -#ifdef CONFIG_BATMAN_ADV_BLA - struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; - int bcast_duplist_curr; - struct batadv_bla_claim_dst claim_dest; -#endif spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ - spinlock_t tt_changes_list_lock; /* protects tt_changes */ - spinlock_t tt_req_list_lock; /* protects tt_req_list */ - spinlock_t tt_roam_list_lock; /* protects tt_roam_list */ - spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ - spinlock_t vis_hash_lock; /* protects vis_hash */ - spinlock_t vis_list_lock; /* protects vis_info::recv_list */ - atomic_t num_local_tt; - /* Checksum of the local table, recomputed before sending a new OGM */ - uint16_t tt_crc; - unsigned char *tt_buff; - int16_t tt_buff_len; - spinlock_t tt_buff_lock; /* protects tt_buff */ - struct delayed_work tt_work; struct delayed_work orig_work; - struct delayed_work vis_work; - struct delayed_work bla_work; - struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ - atomic_t gw_reselect; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ - struct batadv_vis_info *my_vis_info; struct batadv_algo_ops *bat_algo_ops; +#ifdef CONFIG_BATMAN_ADV_BLA + struct batadv_priv_bla bla; +#endif + struct batadv_priv_gw gw; + struct batadv_priv_tt tt; + struct batadv_priv_vis vis; }; struct batadv_socket_client { diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 2a2ea0681469..4608c1b22d44 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -41,13 +41,13 @@ static void batadv_free_info(struct kref *ref) bat_priv = info->bat_priv; list_del_init(&info->send_list); - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry_safe(entry, tmp, &info->recv_list, list) { list_del(&entry->list); kfree(entry); } - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); kfree_skb(info->skb_packet); kfree(info); } @@ -94,7 +94,7 @@ static uint32_t batadv_vis_info_choose(const void *data, uint32_t size) static struct batadv_vis_info * batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) { - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_head *head; struct hlist_node *node; struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; @@ -252,7 +252,7 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset) struct hlist_head *head; struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; uint32_t i; int ret = 0; int vis_server = atomic_read(&bat_priv->vis_mode); @@ -264,12 +264,12 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset) if (vis_server == BATADV_VIS_TYPE_CLIENT_UPDATE) goto out; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; batadv_vis_seq_print_text_bucket(seq, head); } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); out: if (primary_if) @@ -285,7 +285,7 @@ static void batadv_send_list_add(struct batadv_priv *bat_priv, { if (list_empty(&info->send_list)) { kref_get(&info->refcount); - list_add_tail(&info->send_list, &bat_priv->vis_send_list); + list_add_tail(&info->send_list, &bat_priv->vis.send_list); } } @@ -311,9 +311,9 @@ static void batadv_recv_list_add(struct batadv_priv *bat_priv, return; memcpy(entry->mac, mac, ETH_ALEN); - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_add_tail(&entry->list, recv_list); - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); } /* returns 1 if this mac is in the recv_list */ @@ -323,14 +323,14 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv, { const struct batadv_recvlist_node *entry; - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry(entry, recv_list, list) { if (batadv_compare_eth(entry->mac, mac)) { - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); return 1; } } - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); return 0; } @@ -354,7 +354,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, *is_new = 0; /* sanity check */ - if (!bat_priv->vis_hash) + if (!bat_priv->vis.hash) return NULL; /* see if the packet is already in vis_hash */ @@ -385,7 +385,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, } } /* remove old entry */ - batadv_hash_remove(bat_priv->vis_hash, batadv_vis_info_cmp, + batadv_hash_remove(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, old_info); batadv_send_list_del(old_info); kref_put(&old_info->refcount, batadv_free_info); @@ -426,7 +426,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, batadv_recv_list_add(bat_priv, &info->recv_list, packet->sender_orig); /* try to add it */ - hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp, + hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, info, &info->hash_entry); if (hash_added != 0) { @@ -449,7 +449,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, make_broadcast = (vis_server == BATADV_VIS_TYPE_SERVER_SYNC); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, &is_new, make_broadcast); if (!info) @@ -461,7 +461,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && is_new) batadv_send_list_add(bat_priv, info); end: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* handle an incoming client update packet and schedule forward if needed. */ @@ -484,7 +484,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_is_my_mac(vis_packet->target_orig)) are_target = 1; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, &is_new, are_target); @@ -505,7 +505,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, } end: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* Walk the originators and find the VIS server with the best tq. Set the packet @@ -574,7 +574,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; - struct batadv_vis_info *info = bat_priv->my_vis_info; + struct batadv_vis_info *info = bat_priv->vis.my_info; struct batadv_vis_packet *packet; struct batadv_vis_info_entry *entry; struct batadv_tt_common_entry *tt_common_entry; @@ -636,7 +636,7 @@ next: rcu_read_unlock(); } - hash = bat_priv->tt_local_hash; + hash = bat_priv->tt.local_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -671,7 +671,7 @@ unlock: static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) { uint32_t i; - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_vis_info *info; @@ -682,7 +682,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(info, node, node_tmp, head, hash_entry) { /* never purge own data. */ - if (info == bat_priv->my_vis_info) + if (info == bat_priv->vis.my_info) continue; if (batadv_has_timed_out(info->first_seen, @@ -817,31 +817,33 @@ static void batadv_send_vis_packets(struct work_struct *work) struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct batadv_priv *bat_priv; + struct batadv_priv_vis *priv_vis; struct batadv_vis_info *info; - bat_priv = container_of(delayed_work, struct batadv_priv, vis_work); - spin_lock_bh(&bat_priv->vis_hash_lock); + priv_vis = container_of(delayed_work, struct batadv_priv_vis, work); + bat_priv = container_of(priv_vis, struct batadv_priv, vis); + spin_lock_bh(&bat_priv->vis.hash_lock); batadv_purge_vis_packets(bat_priv); if (batadv_generate_vis_packet(bat_priv) == 0) { /* schedule if generation was successful */ - batadv_send_list_add(bat_priv, bat_priv->my_vis_info); + batadv_send_list_add(bat_priv, bat_priv->vis.my_info); } - while (!list_empty(&bat_priv->vis_send_list)) { - info = list_first_entry(&bat_priv->vis_send_list, + while (!list_empty(&bat_priv->vis.send_list)) { + info = list_first_entry(&bat_priv->vis.send_list, typeof(*info), send_list); kref_get(&info->refcount); - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_send_vis_packet(bat_priv, info); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); batadv_send_list_del(info); kref_put(&info->refcount, batadv_free_info); } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_start_vis_timer(bat_priv); } @@ -856,37 +858,37 @@ int batadv_vis_init(struct batadv_priv *bat_priv) unsigned long first_seen; struct sk_buff *tmp_skb; - if (bat_priv->vis_hash) + if (bat_priv->vis.hash) return 0; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); - bat_priv->vis_hash = batadv_hash_new(256); - if (!bat_priv->vis_hash) { + bat_priv->vis.hash = batadv_hash_new(256); + if (!bat_priv->vis.hash) { pr_err("Can't initialize vis_hash\n"); goto err; } - bat_priv->my_vis_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); - if (!bat_priv->my_vis_info) + bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); + if (!bat_priv->vis.my_info) goto err; len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; - bat_priv->my_vis_info->skb_packet = dev_alloc_skb(len); - if (!bat_priv->my_vis_info->skb_packet) + bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->my_vis_info->skb_packet, ETH_HLEN); - tmp_skb = bat_priv->my_vis_info->skb_packet; + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); + tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); /* prefill the vis info */ first_seen = jiffies - msecs_to_jiffies(BATADV_VIS_INTERVAL); - bat_priv->my_vis_info->first_seen = first_seen; - INIT_LIST_HEAD(&bat_priv->my_vis_info->recv_list); - INIT_LIST_HEAD(&bat_priv->my_vis_info->send_list); - kref_init(&bat_priv->my_vis_info->refcount); - bat_priv->my_vis_info->bat_priv = bat_priv; + bat_priv->vis.my_info->first_seen = first_seen; + INIT_LIST_HEAD(&bat_priv->vis.my_info->recv_list); + INIT_LIST_HEAD(&bat_priv->vis.my_info->send_list); + kref_init(&bat_priv->vis.my_info->refcount); + bat_priv->vis.my_info->bat_priv = bat_priv; packet->header.version = BATADV_COMPAT_VERSION; packet->header.packet_type = BATADV_VIS; packet->header.ttl = BATADV_TTL; @@ -894,28 +896,28 @@ int batadv_vis_init(struct batadv_priv *bat_priv) packet->reserved = 0; packet->entries = 0; - INIT_LIST_HEAD(&bat_priv->vis_send_list); + INIT_LIST_HEAD(&bat_priv->vis.send_list); - hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp, + hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, - bat_priv->my_vis_info, - &bat_priv->my_vis_info->hash_entry); + bat_priv->vis.my_info, + &bat_priv->vis.my_info->hash_entry); if (hash_added != 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ - kref_put(&bat_priv->my_vis_info->refcount, batadv_free_info); + kref_put(&bat_priv->vis.my_info->refcount, batadv_free_info); goto err; } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_start_vis_timer(bat_priv); return 0; free_info: - kfree(bat_priv->my_vis_info); - bat_priv->my_vis_info = NULL; + kfree(bat_priv->vis.my_info); + bat_priv->vis.my_info = NULL; err: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_vis_quit(bat_priv); return -ENOMEM; } @@ -933,23 +935,23 @@ static void batadv_free_info_ref(struct hlist_node *node, void *arg) /* shutdown vis-server */ void batadv_vis_quit(struct batadv_priv *bat_priv) { - if (!bat_priv->vis_hash) + if (!bat_priv->vis.hash) return; - cancel_delayed_work_sync(&bat_priv->vis_work); + cancel_delayed_work_sync(&bat_priv->vis.work); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); /* properly remove, kill timers ... */ - batadv_hash_delete(bat_priv->vis_hash, batadv_free_info_ref, NULL); - bat_priv->vis_hash = NULL; - bat_priv->my_vis_info = NULL; - spin_unlock_bh(&bat_priv->vis_hash_lock); + batadv_hash_delete(bat_priv->vis.hash, batadv_free_info_ref, NULL); + bat_priv->vis.hash = NULL; + bat_priv->vis.my_info = NULL; + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* schedule packets for (re)transmission */ static void batadv_start_vis_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->vis_work, batadv_send_vis_packets); - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis_work, + INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, msecs_to_jiffies(BATADV_VIS_INTERVAL)); } -- cgit v1.2.1 From ff51fd70ad3e6bbf1ffc7d2a1d6a845d081df5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Thu, 5 Jul 2012 11:34:27 +0200 Subject: batman-adv: Move batadv_check_unicast_packet() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batadv_check_unicast_packet() is needed in batadv_recv_tt_query(), so move the former to before the latter. Signed-off-by: Martin Hundebøll Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 50 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d5edee7ecfa8..a79ded53dd94 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -579,6 +579,31 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } +static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +{ + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return -1; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + return -1; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + return -1; + + /* not for me */ + if (!batadv_is_my_mac(ethhdr->h_dest)) + return -1; + + return 0; +} + int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); @@ -819,31 +844,6 @@ err: return NULL; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) -{ - struct ethhdr *ethhdr; - - /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, hdr_size))) - return -1; - - ethhdr = (struct ethhdr *)skb_mac_header(skb); - - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - return -1; - - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - return -1; - - /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) - return -1; - - return 0; -} - static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { -- cgit v1.2.1 From 74ee3634dcd1f11624192f72f458bcb5a04e4822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Thu, 5 Jul 2012 11:34:28 +0200 Subject: batman-adv: Drop tt queries with foreign dest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabling promiscuous mode, tt queries for other hosts might be received. Before this patch, "foreign" tt queries were processed like any other query and thus forwarded to its destination again and thereby causing a loop. This patch adds a check to drop foreign tt queries. Signed-off-by: Martin Hundebøll Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index a79ded53dd94..4961278086bd 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -609,29 +609,17 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_tt_query_packet *tt_query; uint16_t tt_size; - struct ethhdr *ethhdr; + int hdr_size = sizeof(*tt_query); char tt_flag; size_t packet_size; - /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, - sizeof(struct batadv_tt_query_packet)))) - goto out; + if (batadv_check_unicast_packet(skb, hdr_size) < 0) + return NET_RX_DROP; /* I could need to modify it */ if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); - - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; - - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - goto out; - tt_query = (struct batadv_tt_query_packet *)skb->data; switch (tt_query->flags & BATADV_TT_QUERY_TYPE_MASK) { -- cgit v1.2.1 From 8de47de5757bd5f64a823185e544210d95fd2088 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 8 Jul 2012 16:32:09 +0200 Subject: batman-adv: Use BIT(x) macro to calculate bit positions Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 12 +++++++----- net/batman-adv/main.h | 18 +++++++++--------- net/batman-adv/packet.h | 30 +++++++++++++++--------------- 3 files changed, 31 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index eb507c901696..daaccc1326d4 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -181,8 +181,8 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ - if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == hard_iface)) + if (forw_packet->direct_link_flags & BIT(packet_num) && + forw_packet->if_incoming == hard_iface) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; @@ -454,6 +454,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, int packet_len, bool direct_link) { unsigned char *skb_buff; + unsigned long new_direct_link_flag; skb_buff = skb_put(forw_packet_aggr->skb, packet_len); memcpy(skb_buff, packet_buff, packet_len); @@ -461,9 +462,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, forw_packet_aggr->num_packets++; /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= - (1 << forw_packet_aggr->num_packets); + if (direct_link) { + new_direct_link_flag = BIT(forw_packet_aggr->num_packets); + forw_packet_aggr->direct_link_flags |= new_direct_link_flag; + } } static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 4dfeae5e344a..574fca1bd434 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -123,15 +123,6 @@ enum batadv_uev_type { /* Append 'batman-adv: ' before kernel messages */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -/* all messages related to routing / flooding / broadcasting / etc */ -enum batadv_dbg_level { - BATADV_DBG_BATMAN = 1 << 0, - BATADV_DBG_ROUTES = 1 << 1, /* route added / changed / deleted */ - BATADV_DBG_TT = 1 << 2, /* translation table operations */ - BATADV_DBG_BLA = 1 << 3, /* bridge loop avoidance */ - BATADV_DBG_ALL = 15, -}; - /* Kernel headers */ #include /* mutex */ @@ -173,6 +164,15 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +/* all messages related to routing / flooding / broadcasting / etc */ +enum batadv_dbg_level { + BATADV_DBG_BATMAN = BIT(0), + BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */ + BATADV_DBG_TT = BIT(2), /* translation table operations */ + BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */ + BATADV_DBG_ALL = 15, +}; + #ifdef CONFIG_BATMAN_ADV_DEBUG int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 65d66e4ee757..eb4593413b73 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -37,10 +37,10 @@ enum batadv_packettype { #define BATADV_COMPAT_VERSION 14 enum batadv_iv_flags { - BATADV_NOT_BEST_NEXT_HOP = 1 << 3, - BATADV_PRIMARIES_FIRST_HOP = 1 << 4, - BATADV_VIS_SERVER = 1 << 5, - BATADV_DIRECTLINK = 1 << 6, + BATADV_NOT_BEST_NEXT_HOP = BIT(3), + BATADV_PRIMARIES_FIRST_HOP = BIT(4), + BATADV_VIS_SERVER = BIT(5), + BATADV_DIRECTLINK = BIT(6), }; /* ICMP message types */ @@ -60,8 +60,8 @@ enum batadv_vis_packettype { /* fragmentation defines */ enum batadv_unicast_frag_flags { - BATADV_UNI_FRAG_HEAD = 1 << 0, - BATADV_UNI_FRAG_LARGETAIL = 1 << 1, + BATADV_UNI_FRAG_HEAD = BIT(0), + BATADV_UNI_FRAG_LARGETAIL = BIT(1), }; /* TT_QUERY subtypes */ @@ -74,20 +74,20 @@ enum batadv_tt_query_packettype { /* TT_QUERY flags */ enum batadv_tt_query_flags { - BATADV_TT_FULL_TABLE = 1 << 2, + BATADV_TT_FULL_TABLE = BIT(2), }; /* BATADV_TT_CLIENT flags. - * Flags from 1 to 1 << 7 are sent on the wire, while flags from 1 << 8 to - * 1 << 15 are used for local computation only + * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to + * BIT(15) are used for local computation only */ enum batadv_tt_client_flags { - BATADV_TT_CLIENT_DEL = 1 << 0, - BATADV_TT_CLIENT_ROAM = 1 << 1, - BATADV_TT_CLIENT_WIFI = 1 << 2, - BATADV_TT_CLIENT_NOPURGE = 1 << 8, - BATADV_TT_CLIENT_NEW = 1 << 9, - BATADV_TT_CLIENT_PENDING = 1 << 10, + BATADV_TT_CLIENT_DEL = BIT(0), + BATADV_TT_CLIENT_ROAM = BIT(1), + BATADV_TT_CLIENT_WIFI = BIT(2), + BATADV_TT_CLIENT_NOPURGE = BIT(8), + BATADV_TT_CLIENT_NEW = BIT(9), + BATADV_TT_CLIENT_PENDING = BIT(10), }; /* claim frame types for the bridge loop avoidance */ -- cgit v1.2.1 From bbb1f90efba89b31fc5e329d5fcaf10aca99212b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 8 Jul 2012 17:13:15 +0200 Subject: batman-adv: Don't break statements after assignment operator Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 39 ++++++++++++++++++---------------- net/batman-adv/bridge_loop_avoidance.c | 7 +++--- net/batman-adv/send.c | 8 +++---- net/batman-adv/soft-interface.c | 5 +++-- net/batman-adv/unicast.c | 11 +++++----- net/batman-adv/vis.c | 4 ++-- 6 files changed, 39 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index daaccc1326d4..616d52d9cd0a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -588,6 +588,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if; int vis_server, tt_num_changes = 0; + uint32_t seqno; + uint8_t bandwidth; vis_server = atomic_read(&bat_priv->vis_mode); primary_if = batadv_primary_if_get_selected(bat_priv); @@ -601,8 +603,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; /* change sequence number to network order */ - batadv_ogm_packet->seqno = - htonl((uint32_t)atomic_read(&hard_iface->seqno)); + seqno = (uint32_t)atomic_read(&hard_iface->seqno); + batadv_ogm_packet->seqno = htonl(seqno); atomic_inc(&hard_iface->seqno); batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); @@ -615,12 +617,13 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) else batadv_ogm_packet->flags &= ~BATADV_VIS_SERVER; - if ((hard_iface == primary_if) && - (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER)) - batadv_ogm_packet->gw_flags = - (uint8_t)atomic_read(&bat_priv->gw_bandwidth); - else + if (hard_iface == primary_if && + atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER) { + bandwidth = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); + batadv_ogm_packet->gw_flags = bandwidth; + } else { batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; + } batadv_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, @@ -644,8 +647,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; + uint8_t tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); @@ -669,8 +673,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_lock_bh(&tmp_neigh_node->lq_update_lock); batadv_ring_buffer_set(tmp_neigh_node->tq_recv, &tmp_neigh_node->tq_index, 0); - tmp_neigh_node->tq_avg = - batadv_ring_buffer_avg(tmp_neigh_node->tq_recv); + tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->tq_recv); + tmp_neigh_node->tq_avg = tq_avg; spin_unlock_bh(&tmp_neigh_node->lq_update_lock); } @@ -729,17 +733,15 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + sum_orig = orig_node_tmp->bcast_own_sum[if_incoming->if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + sum_neigh = orig_node_tmp->bcast_own_sum[if_incoming->if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } @@ -912,6 +914,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, int set_mark, ret = -1; uint32_t seqno = ntohl(batadv_ogm_packet->seqno); uint8_t *neigh_addr; + uint8_t packet_count; orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); if (!orig_node) @@ -946,9 +949,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, tmp_neigh_node->real_bits, seq_diff, set_mark); - tmp_neigh_node->real_packet_count = - bitmap_weight(tmp_neigh_node->real_bits, - BATADV_TQ_LOCAL_WINDOW_SIZE); + packet_count = bitmap_weight(tmp_neigh_node->real_bits, + BATADV_TQ_LOCAL_WINDOW_SIZE); + tmp_neigh_node->real_packet_count = packet_count; } rcu_read_unlock(); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ad18017da4e7..7a93b2ae3a9b 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -579,8 +579,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", mac, vid); - claim->backbone_gw->crc ^= - crc16(0, claim->addr, ETH_ALEN); + claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); } @@ -1102,8 +1101,7 @@ static void batadv_bla_start_timer(struct batadv_priv *bat_priv) */ static void batadv_bla_periodic_work(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct batadv_priv_bla *priv_bla; struct hlist_node *node; @@ -1113,6 +1111,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct batadv_hard_iface *primary_if; int i; + delayed_work = container_of(work, struct delayed_work, work); priv_bla = container_of(delayed_work, struct batadv_priv_bla, work); bat_priv = container_of(priv_bla, struct batadv_priv, bla); primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3b4b2daa3b3e..570a8bce0364 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -190,13 +190,13 @@ out: static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct sk_buff *skb1; struct net_device *soft_iface; struct batadv_priv *bat_priv; + delayed_work = container_of(work, struct delayed_work, work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); soft_iface = forw_packet->if_incoming->soft_iface; @@ -239,11 +239,11 @@ out: void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; + delayed_work = container_of(work, struct delayed_work, work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6d44625c0f5e..03b0763abd2f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -149,6 +149,7 @@ static int batadv_interface_tx(struct sk_buff *skb, int data_len = skb->len, ret; short vid __maybe_unused = -1; bool do_bcast = false; + uint32_t seqno; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -230,8 +231,8 @@ static int batadv_interface_tx(struct sk_buff *skb, primary_if->net_dev->dev_addr, ETH_ALEN); /* set broadcast sequence number */ - bcast_packet->seqno = - htonl(atomic_inc_return(&bat_priv->bcast_seqno)); + seqno = atomic_inc_return(&bat_priv->bcast_seqno); + bcast_packet->seqno = htonl(seqno); batadv_add_bcast_packet_to_list(bat_priv, skb, 1); diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 00164645b3f7..d7904bdb1ff9 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -121,6 +121,7 @@ batadv_frag_search_packet(struct list_head *head, { struct batadv_frag_packet_list_entry *tfp; struct batadv_unicast_frag_packet *tmp_up = NULL; + int is_head_tmp, is_head; uint16_t search_seqno; if (up->flags & BATADV_UNI_FRAG_HEAD) @@ -128,6 +129,8 @@ batadv_frag_search_packet(struct list_head *head, else search_seqno = ntohs(up->seqno)-1; + is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); + list_for_each_entry(tfp, head, list) { if (!tfp->skb) @@ -139,9 +142,8 @@ batadv_frag_search_packet(struct list_head *head, tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; if (tfp->seqno == search_seqno) { - - if ((tmp_up->flags & BATADV_UNI_FRAG_HEAD) != - (up->flags & BATADV_UNI_FRAG_HEAD)) + is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); + if (is_head_tmp != is_head) return tfp; else goto mov_tail; @@ -334,8 +336,7 @@ find_router: /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ - unicast_packet->ttvn = - (uint8_t)atomic_read(&orig_node->last_ttvn); + unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 4608c1b22d44..55cc51bcc110 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -814,12 +814,12 @@ out: /* called from timer; send (and maybe generate) vis packet. */ static void batadv_send_vis_packets(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct batadv_priv_vis *priv_vis; struct batadv_vis_info *info; + delayed_work = container_of(work, struct delayed_work, work); priv_vis = container_of(delayed_work, struct batadv_priv_vis, work); bat_priv = container_of(priv_vis, struct batadv_priv, vis); spin_lock_bh(&bat_priv->vis.hash_lock); -- cgit v1.2.1 From c67893d17a6bbd16328a1ee38ab0cb460511014a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 8 Jul 2012 18:33:51 +0200 Subject: batman-adv: Reduce accumulated length of simple statements Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 41 +++++++++++++++++++++------------- net/batman-adv/bridge_loop_avoidance.c | 3 +-- net/batman-adv/gateway_client.c | 21 ++++++++++------- net/batman-adv/routing.c | 3 +-- net/batman-adv/translation-table.c | 10 +++++---- net/batman-adv/unicast.c | 5 +++-- net/batman-adv/vis.c | 10 ++++----- 7 files changed, 55 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 616d52d9cd0a..df79300dcb7b 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -166,13 +166,15 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, int16_t buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; + uint8_t *packet_pos; if (hard_iface->if_status != BATADV_IF_ACTIVE) return; packet_num = 0; buff_pos = 0; - batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data; + packet_pos = forw_packet->skb->data; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, @@ -187,9 +189,11 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, else batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; - fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? - "Sending own" : - "Forwarding")); + if (packet_num > 0 || !forw_packet->own) + fwd_str = "Forwarding"; + else + fwd_str = "Sending own"; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s, ttvn %d) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), @@ -204,8 +208,8 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, buff_pos += BATADV_OGM_HLEN; buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); packet_num++; - batadv_ogm_packet = (struct batadv_ogm_packet *) - (forw_packet->skb->data + buff_pos); + packet_pos = forw_packet->skb->data + buff_pos; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; } /* create clone because function is called more than once */ @@ -227,9 +231,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) struct batadv_hard_iface *primary_if = NULL; struct batadv_ogm_packet *batadv_ogm_packet; unsigned char directlink; + uint8_t *packet_pos; - batadv_ogm_packet = (struct batadv_ogm_packet *) - (forw_packet->skb->data); + packet_pos = forw_packet->skb->data; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; directlink = (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { @@ -839,8 +844,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, spin_unlock_bh(&orig_node->ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_eq_count > neigh_rq_count ? - neigh_rq_count : orig_eq_count); + if (orig_eq_count > neigh_rq_count) + total_count = neigh_rq_count; + else + total_count = orig_eq_count; /* if we have too few packets (too less data) we set tq_own to zero * if we receive too few packets it is not considered bidirectional @@ -1168,9 +1175,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* if sender is a direct neighbor the sender mac equals * originator mac */ - orig_neigh_node = (is_single_hop_neigh ? - orig_node : - batadv_get_orig_node(bat_priv, ethhdr->h_source)); + if (is_single_hop_neigh) + orig_neigh_node = orig_node; + else + orig_neigh_node = batadv_get_orig_node(bat_priv, + ethhdr->h_source); + if (!orig_neigh_node) goto out; @@ -1256,6 +1266,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, int buff_pos = 0, packet_len; unsigned char *tt_buff, *packet_buff; bool ret; + uint8_t *packet_pos; ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); if (!ret) @@ -1286,8 +1297,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, buff_pos += BATADV_OGM_HLEN; buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); - batadv_ogm_packet = (struct batadv_ogm_packet *) - (packet_buff + buff_pos); + packet_pos = packet_buff + buff_pos; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, batadv_ogm_packet->tt_num_changes)); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 7a93b2ae3a9b..0a9084ad19a6 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1345,8 +1345,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr))) return 0; - vhdr = (struct vlan_ethhdr *)(((uint8_t *)skb->data) + - hdr_size); + vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index eef7cc739397..15d67abc10a4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -117,10 +117,15 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; + uint32_t gw_divisor; uint8_t max_tq = 0; int down, up; + uint8_t tq_avg; struct batadv_orig_node *orig_node; + gw_divisor = BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE; + gw_divisor *= 64; + rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->deleted) @@ -134,19 +139,19 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (!atomic_inc_not_zero(&gw_node->refcount)) goto next; + tq_avg = router->tq_avg; + switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ batadv_gw_bandwidth_to_kbit(orig_node->gw_flags, &down, &up); - tmp_gw_factor = (router->tq_avg * router->tq_avg * - down * 100 * 100) / - (BATADV_TQ_LOCAL_WINDOW_SIZE * - BATADV_TQ_LOCAL_WINDOW_SIZE * 64); + tmp_gw_factor = tq_avg * tq_avg * down * 100 * 100; + tmp_gw_factor /= gw_divisor; if ((tmp_gw_factor > max_gw_factor) || ((tmp_gw_factor == max_gw_factor) && - (router->tq_avg > max_tq))) { + (tq_avg > max_tq))) { if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; @@ -161,7 +166,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) * soon as a better gateway appears which has * $routing_class more tq points) */ - if (router->tq_avg > max_tq) { + if (tq_avg > max_tq) { if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; @@ -170,8 +175,8 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) break; } - if (router->tq_avg > max_tq) - max_tq = router->tq_avg; + if (tq_avg > max_tq) + max_tq = tq_avg; if (tmp_gw_factor > max_gw_factor) max_gw_factor = tmp_gw_factor; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 4961278086bd..9f933c95dc0e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -981,8 +981,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, } else { memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); - curr_ttvn = (uint8_t) - atomic_read(&orig_node->last_ttvn); + curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); batadv_orig_node_free_ref(orig_node); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b01049a7a912..cb429d181f4d 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1559,6 +1559,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, uint16_t tt_len, tt_tot; struct sk_buff *skb = NULL; struct batadv_tt_query_packet *tt_response; + uint8_t *packet_pos; size_t len; batadv_dbg(BATADV_DBG_TT, bat_priv, @@ -1612,8 +1613,8 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, goto unlock; skb_reserve(skb, ETH_HLEN); - tt_response = (struct batadv_tt_query_packet *)skb_put(skb, - len); + packet_pos = skb_put(skb, len); + tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; tt_response->tt_data = htons(tt_tot); @@ -1692,6 +1693,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, uint16_t tt_len, tt_tot; struct sk_buff *skb = NULL; struct batadv_tt_query_packet *tt_response; + uint8_t *packet_pos; size_t len; batadv_dbg(BATADV_DBG_TT, bat_priv, @@ -1738,8 +1740,8 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, goto unlock; skb_reserve(skb, ETH_HLEN); - tt_response = (struct batadv_tt_query_packet *)skb_put(skb, - len); + packet_pos = skb_put(skb, len); + tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; tt_response->tt_data = htons(tt_tot); diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index d7904bdb1ff9..f39723281ca1 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,6 +39,7 @@ batadv_frag_merge_packet(struct list_head *head, struct batadv_unicast_packet *unicast_packet; int hdr_len = sizeof(*unicast_packet); int uni_diff = sizeof(*up) - hdr_len; + uint8_t *packet_pos; up = (struct batadv_unicast_frag_packet *)skb->data; /* set skb to the first part and tmp_skb to the second part */ @@ -65,8 +66,8 @@ batadv_frag_merge_packet(struct list_head *head, kfree_skb(tmp_skb); memmove(skb->data + uni_diff, skb->data, hdr_len); - unicast_packet = (struct batadv_unicast_packet *)skb_pull(skb, - uni_diff); + packet_pos = skb_pull(skb, uni_diff); + unicast_packet = (struct batadv_unicast_packet *)packet_pos; unicast_packet->header.packet_type = BATADV_UNICAST; return skb; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 55cc51bcc110..5abd1454fb07 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -578,6 +578,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) struct batadv_vis_packet *packet; struct batadv_vis_info_entry *entry; struct batadv_tt_common_entry *tt_common_entry; + uint8_t *packet_pos; int best_tq = -1; uint32_t i; @@ -618,8 +619,8 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) goto next; /* fill one entry into buffer. */ - entry = (struct batadv_vis_info_entry *) - skb_put(info->skb_packet, sizeof(*entry)); + packet_pos = skb_put(info->skb_packet, sizeof(*entry)); + entry = (struct batadv_vis_info_entry *)packet_pos; memcpy(entry->src, router->if_incoming->net_dev->dev_addr, ETH_ALEN); @@ -644,9 +645,8 @@ next: rcu_read_lock(); hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - entry = (struct batadv_vis_info_entry *) - skb_put(info->skb_packet, - sizeof(*entry)); + packet_pos = skb_put(info->skb_packet, sizeof(*entry)); + entry = (struct batadv_vis_info_entry *)packet_pos; memset(entry->src, 0, ETH_ALEN); memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN); entry->quality = 0; /* 0 means TT */ -- cgit v1.2.1 From 30cfd02b60e1cb16f5effb0a01f826c5bb7e4c59 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 5 Jul 2012 23:38:29 +0200 Subject: batman-adv: detect not yet announced clients With the current TT mechanism a new client joining the network is not immediately able to communicate with other hosts because its MAC address has not been announced yet. This situation holds until the first OGM containing its joining event will be spread over the mesh network. This behaviour can be acceptable in networks where the originator interval is a small value (e.g. 1sec) but if that value is set to an higher time (e.g. 5secs) the client could suffer from several malfunctions like DHCP client timeouts, etc. This patch adds an early detection mechanism that makes nodes in the network able to recognise "not yet announced clients" by means of the broadcast packets they emitted on connection (e.g. ARP or DHCP request). The added client will then be confirmed upon receiving the OGM claiming it or purged if such OGM is not received within a fixed amount of time. Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 1 + net/batman-adv/packet.h | 1 + net/batman-adv/translation-table.c | 127 +++++++++++++++++++++++++++---------- net/batman-adv/translation-table.h | 4 +- net/batman-adv/types.h | 1 + 5 files changed, 101 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 574fca1bd434..61a0cfd3ceb4 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -43,6 +43,7 @@ #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */ #define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index eb4593413b73..2d23a14c220e 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -85,6 +85,7 @@ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), BATADV_TT_CLIENT_ROAM = BIT(1), BATADV_TT_CLIENT_WIFI = BIT(2), + BATADV_TT_CLIENT_TEMP = BIT(3), BATADV_TT_CLIENT_NOPURGE = BIT(8), BATADV_TT_CLIENT_NEW = BIT(9), BATADV_TT_CLIENT_PENDING = BIT(10), diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cb429d181f4d..112edd371b2f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -34,6 +34,10 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, static void batadv_tt_purge(struct work_struct *work); static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry); +static void batadv_tt_global_del(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr, + const char *message, bool roaming); /* returns 1 if they are the same mac addr */ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) @@ -268,6 +272,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, tt_local_entry->common.flags |= BATADV_TT_CLIENT_WIFI; atomic_set(&tt_local_entry->common.refcount, 2); tt_local_entry->last_seen = jiffies; + tt_local_entry->common.added_at = tt_local_entry->last_seen; /* the batman interface mac address should never be purged */ if (batadv_compare_eth(addr, soft_iface->dev_addr)) @@ -682,8 +687,13 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, struct batadv_tt_orig_list_entry *orig_entry; orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); - if (orig_entry) + if (orig_entry) { + /* refresh the ttvn: the current value could be a bogus one that + * was added during a "temporary client detection" + */ + orig_entry->ttvn = ttvn; goto out; + } orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); if (!orig_entry) @@ -729,6 +739,7 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, common->flags = flags; tt_global_entry->roam_at = 0; atomic_set(&common->refcount, 2); + common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); spin_lock_init(&tt_global_entry->list_lock); @@ -744,7 +755,19 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, goto out_remove; } } else { - /* there is already a global entry, use this one. */ + /* If there is already a global entry, we can use this one for + * our processing. + * But if we are trying to add a temporary client we can exit + * directly because the temporary information should never + * override any already known client state (whatever it is) + */ + if (flags & BATADV_TT_CLIENT_TEMP) + goto out; + + /* if the client was temporary added before receiving the first + * OGM announcing it, we have to clear the TEMP flag + */ + tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_TEMP; /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a @@ -758,9 +781,8 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = 0; } - } - /* add the new orig_entry (if needed) */ + /* add the new orig_entry (if needed) or update it */ batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); batadv_dbg(BATADV_DBG_TT, bat_priv, @@ -800,11 +822,12 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, hlist_for_each_entry_rcu(orig_entry, node, head, list) { flags = tt_common_entry->flags; last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); - seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c]\n", + seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c%c]\n", tt_global_entry->common.addr, orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), - (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.')); + (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); } } @@ -1059,49 +1082,63 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, orig_node->tt_initialised = false; } -static void batadv_tt_global_roam_purge_list(struct batadv_priv *bat_priv, - struct hlist_head *head) +static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, + char **msg) { - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_global_entry *tt_global_entry; - struct hlist_node *node, *node_tmp; + bool purge = false; + unsigned long roam_timeout = BATADV_TT_CLIENT_ROAM_TIMEOUT; + unsigned long temp_timeout = BATADV_TT_CLIENT_TEMP_TIMEOUT; - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, - hash_entry) { - tt_global_entry = container_of(tt_common_entry, - struct batadv_tt_global_entry, - common); - if (!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM)) - continue; - if (!batadv_has_timed_out(tt_global_entry->roam_at, - BATADV_TT_CLIENT_ROAM_TIMEOUT)) - continue; - - batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting global tt entry (%pM): Roaming timeout\n", - tt_global_entry->common.addr); + if ((tt_global->common.flags & BATADV_TT_CLIENT_ROAM) && + batadv_has_timed_out(tt_global->roam_at, roam_timeout)) { + purge = true; + *msg = "Roaming timeout\n"; + } - hlist_del_rcu(node); - batadv_tt_global_entry_free_ref(tt_global_entry); + if ((tt_global->common.flags & BATADV_TT_CLIENT_TEMP) && + batadv_has_timed_out(tt_global->common.added_at, temp_timeout)) { + purge = true; + *msg = "Temporary client timeout\n"; } + + return purge; } -static void batadv_tt_global_roam_purge(struct batadv_priv *bat_priv) +static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; + struct hlist_node *node, *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; + char *msg = NULL; + struct batadv_tt_common_entry *tt_common; + struct batadv_tt_global_entry *tt_global; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - batadv_tt_global_roam_purge_list(bat_priv, head); + hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hash_entry) { + tt_global = container_of(tt_common, + struct batadv_tt_global_entry, + common); + + if (!batadv_tt_global_to_purge(tt_global, &msg)) + continue; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Deleting global tt entry (%pM): %s\n", + tt_global->common.addr, msg); + + hlist_del_rcu(node); + + batadv_tt_global_entry_free_ref(tt_global); + } spin_unlock_bh(list_lock); } - } static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) @@ -1239,6 +1276,12 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, */ if (tt_common->flags & BATADV_TT_CLIENT_ROAM) continue; + /* Temporary clients have not been announced yet, so + * they have to be skipped while computing the global + * crc + */ + if (tt_common->flags & BATADV_TT_CLIENT_TEMP) + continue; /* find out if this global entry is announced by this * originator @@ -1392,7 +1435,8 @@ static int batadv_tt_global_valid(const void *entry_ptr, const struct batadv_tt_global_entry *tt_global_entry; const struct batadv_orig_node *orig_node = data_ptr; - if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM) + if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM || + tt_common_entry->flags & BATADV_TT_CLIENT_TEMP) return 0; tt_global_entry = container_of(tt_common_entry, @@ -2125,7 +2169,7 @@ static void batadv_tt_purge(struct work_struct *work) bat_priv = container_of(priv_tt, struct batadv_priv, tt); batadv_tt_local_purge(bat_priv); - batadv_tt_global_roam_purge(bat_priv); + batadv_tt_global_purge(bat_priv); batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); @@ -2399,3 +2443,22 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, out: return ret; } + +bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr) +{ + bool ret = false; + + if (!batadv_tt_global_add(bat_priv, orig_node, addr, + BATADV_TT_CLIENT_TEMP, + atomic_read(&orig_node->last_ttvn))) + goto out; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Added temporary global client (addr: %pM orig: %pM)\n", + addr, orig_node->orig); + ret = true; +out: + return ret; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ffa87355096b..811fffd4760c 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -59,6 +59,8 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv, int packet_min_len); bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, uint8_t *addr); - +bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 97c4978dee69..2ed82caacdca 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -284,6 +284,7 @@ struct batadv_tt_common_entry { uint8_t addr[ETH_ALEN]; struct hlist_node hash_entry; uint16_t flags; + unsigned long added_at; atomic_t refcount; struct rcu_head rcu; }; -- cgit v1.2.1 From 371351731e9c2a7d8f4b169731495cb4f0589063 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 5 Jul 2012 23:38:30 +0200 Subject: batman-adv: change interface_rx to get orig node In order to understand where a broadcast packet is coming from and use this information to detect not yet announced clients, this patch modifies the interface_rx() function by passing a new argument: the orig node corresponding to the node that originated the received packet (if known). This new argument if not NULL for broadcast packets only (other packets does not have source field). Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 10 ++++++---- net/batman-adv/soft-interface.c | 6 +++++- net/batman-adv/soft-interface.h | 5 +++-- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 9f933c95dc0e..939fc01371df 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1012,8 +1012,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(unicast_packet->dest)) { - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, - hdr_size); + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, + NULL); + return NET_RX_SUCCESS; } @@ -1050,7 +1051,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, return NET_RX_SUCCESS; batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, - sizeof(struct batadv_unicast_packet)); + sizeof(struct batadv_unicast_packet), NULL); return NET_RX_SUCCESS; } @@ -1137,7 +1138,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* broadcast for me */ - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, + orig_node); ret = NET_RX_SUCCESS; goto out; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 03b0763abd2f..7b683e0bd668 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -270,7 +270,7 @@ end: void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, - int hdr_size) + int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct ethhdr *ethhdr; @@ -322,6 +322,10 @@ void batadv_interface_rx(struct net_device *soft_iface, soft_iface->last_rx = jiffies; + if (orig_node) + batadv_tt_add_temporary_global_entry(bat_priv, orig_node, + ethhdr->h_source); + if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) goto dropped; diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 852c683b06a1..07a08fed28b9 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -21,8 +21,9 @@ #define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); -void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, - struct batadv_hard_iface *recv_if, int hdr_size); +void batadv_interface_rx(struct net_device *soft_iface, + struct sk_buff *skb, struct batadv_hard_iface *recv_if, + int hdr_size, struct batadv_orig_node *orig_node); struct net_device *batadv_softif_create(const char *name); void batadv_softif_destroy(struct net_device *soft_iface); int batadv_softif_is_valid(const struct net_device *net_dev); -- cgit v1.2.1 From fa4f0afcf40361bf67ed3abd520a5fbe12d11166 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 19 Aug 2012 21:48:25 +0200 Subject: batman-adv: Start new development cycle Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 61a0cfd3ceb4..d57b746219de 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2012.3.0" +#define BATADV_SOURCE_VERSION "2012.4.0" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.1 From 748e2d9396a18c3fd3d07d47c1b41320acf1fbf4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2012 21:50:59 +0000 Subject: net: reinstate rtnl in call_netdevice_notifiers() Eric Biederman pointed out that not holding RTNL while calling call_netdevice_notifiers() was racy. This patch is a direct transcription his feedback against commit 0115e8e30d6fc (net: remove delay at device dismantle) Thanks Eric ! Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Cc: "Eric W. Biederman" Cc: Gao feng Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++-- net/core/fib_rules.c | 3 +-- net/ipv4/devinet.c | 6 +----- net/ipv4/fib_frontend.c | 7 ++----- net/ipv6/addrconf.c | 6 +----- 5 files changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0640d2a859c6..bc857fead8c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1466,8 +1466,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - if (val != NETDEV_UNREGISTER_FINAL) - ASSERT_RTNL(); + ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -5782,7 +5781,11 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + __rtnl_unlock(); rcu_barrier(); + rtnl_lock(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5855,7 +5858,9 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); + rtnl_lock(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); + __rtnl_unlock(); if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 585093755c23..ab7db83236c9 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -711,16 +711,15 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net *net = dev_net(dev); struct fib_rules_ops *ops; + ASSERT_RTNL(); switch (event) { case NETDEV_REGISTER: - ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: - ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6a5e6e4b142c..adf273f8ad2e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1147,12 +1147,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev; - - if (event == NETDEV_UNREGISTER_FINAL) - goto out; + struct in_device *in_dev = __in_dev_get_rtnl(dev); - in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fd7d9ae64f16..acdee325d972 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1050,9 +1050,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } - if (event == NETDEV_UNREGISTER_FINAL) - return NOTIFY_DONE; - in_dev = __in_dev_get_rtnl(dev); switch (event) { @@ -1064,14 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(net, -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(net, 0); break; } return NOTIFY_DONE; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e581009cb09e..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2566,14 +2566,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev; + struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; - if (event == NETDEV_UNREGISTER_FINAL) - return NOTIFY_DONE; - - idev = __in6_dev_get(dev); switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { -- cgit v1.2.1 From a0dfb2634e5671770f598cda08002d8cda66ac77 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 23 Aug 2012 19:51:21 +0800 Subject: af_packet: match_fanout_group() can be static cc: Eric Leblond Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index aee7196aac36..c5c9e2a54218 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1273,7 +1273,7 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) { if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) return true; -- cgit v1.2.1 From 78df76a065ae3b5dbcb9a29912adc02f697de498 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Aug 2012 05:40:47 +0000 Subject: ipv4: take rt_uncached_lock only if needed Multicast traffic allocates dst with DST_NOCACHE, but dst is not inserted into rt_uncached_list. This slowdown multicast workloads on SMP because rt_uncached_lock is contended. Change the test before taking the lock to actually check the dst was inserted into rt_uncached_list. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c8c748ebb28..24fd4c596643 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1263,7 +1263,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - if (dst->flags & DST_NOCACHE) { + if (!list_empty(&rt->rt_uncached)) { spin_lock_bh(&rt_uncached_lock); list_del(&rt->rt_uncached); spin_unlock_bh(&rt_uncached_lock); -- cgit v1.2.1 From 3afa6d00fb4f9712fbb44b63ba31f88b6f9239fe Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 20 Aug 2012 07:59:10 +0000 Subject: cls_cgroup: Allow classifier cgroups to have their classid reset to 0 The network classifier cgroup initalizes each cgroups instance classid value to 0. However, the sock_update_classid function only updates classid's in sockets if the tasks cgroup classid is not zero, and if it differs from the current classid. The later check is to prevent cache line dirtying, but the former is detrimental, as it prevents resetting a classid for a cgroup to 0. While this is not a common action, it has administrative usefulness (if the admin wants to disable classification of a certain group temporarily for instance). Easy fix, just remove the zero check. Tested successfully by myself Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced8d6a8..116786c55fe9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1230,7 +1230,7 @@ void sock_update_classid(struct sock *sk) rcu_read_lock(); /* doing current task, which cannot vanish. */ classid = task_cls_classid(current); rcu_read_unlock(); - if (classid && classid != sk->sk_classid) + if (classid != sk->sk_classid) sk->sk_classid = classid; } EXPORT_SYMBOL(sock_update_classid); -- cgit v1.2.1 From 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Aug 2012 22:16:51 +0100 Subject: net: Set device operstate at registration time The operstate of a device is initially IF_OPER_UNKNOWN and is updated asynchronously by linkwatch after each change of carrier state reported by the driver. The default carrier state of a net device is on, and this will never be changed on drivers that do not support carrier detection, thus the operstate remains IF_OPER_UNKNOWN. For devices that do support carrier detection, the driver must set the carrier state to off initially, then poll the hardware state when the device is opened. However, we must not activate linkwatch for a unregistered device, and commit b473001 ('net: Do not fire linkwatch events until the device is registered.') ensured that we don't. But this means that the operstate for many devices that support carrier detection remains IF_OPER_UNKNOWN when it should be IF_OPER_DOWN. The same issue exists with the dormant state. The proper initialisation sequence, avoiding a race with opening of the device, is: rtnl_lock(); rc = register_netdevice(dev); if (rc) goto out_unlock; netif_carrier_off(dev); /* or netif_dormant_on(dev) */ rtnl_unlock(); but it seems silly that this should have to be repeated in so many drivers. Further, the operstate seen immediately after opening the device may still be IF_OPER_UNKNOWN due to the asynchronous nature of linkwatch. Commit 22604c8 ('net: Fix for initial link state in 2.6.28') attempted to fix this by setting the operstate synchronously, but it was reverted as it could lead to deadlock. This initialises the operstate synchronously at registration time only. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ net/core/link_watch.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index bc857fead8c8..2f25d0cac51c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5648,6 +5648,8 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); + linkwatch_init_dev(dev); + dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index c3519c6d1b16..a01922219a23 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev) } +void linkwatch_init_dev(struct net_device *dev) +{ + /* Handle pre-registration link state changes */ + if (!netif_carrier_ok(dev) || netif_dormant(dev)) + rfc2863_policy(dev); +} + + static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) -- cgit v1.2.1 From 20e1db19db5d6b9e4e83021595eab0dc8f107bef Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 23 Aug 2012 02:09:11 +0000 Subject: netlink: fix possible spoofing from non-root processes Non-root user-space processes can send Netlink messages to other processes that are well-known for being subscribed to Netlink asynchronous notifications. This allows ilegitimate non-root process to send forged messages to Netlink subscribers. The userspace process usually verifies the legitimate origin in two ways: a) Socket credentials. If UID != 0, then the message comes from some ilegitimate process and the message needs to be dropped. b) Netlink portID. In general, portID == 0 means that the origin of the messages comes from the kernel. Thus, discarding any message not coming from the kernel. However, ctnetlink sets the portID in event messages that has been triggered by some user-space process, eg. conntrack utility. So other processes subscribed to ctnetlink events, eg. conntrackd, know that the event was triggered by some user-space action. Neither of the two ways to discard ilegitimate messages coming from non-root processes can help for ctnetlink. This patch adds capability validation in case that dst_pid is set in netlink_sendmsg(). This approach is aggressive since existing applications using any Netlink bus to deliver messages between two user-space processes will break. Note that the exception is NETLINK_USERSOCK, since it is reserved for netlink-to-netlink userspace communication. Still, if anyone wants that his Netlink bus allows netlink-to-netlink userspace, then they can set NL_NONROOT_SEND. However, by default, I don't think it makes sense to allow to use NETLINK_ROUTE to communicate two processes that are sending no matter what information that is not related to link/neighbouring/routing. They should be using NETLINK_USERSOCK instead for that. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1445d73533ed..527023823b5c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1373,7 +1373,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2147,6 +2148,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } -- cgit v1.2.1 From 9b361c13ceeae872161175d39f4798ee345ed10c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 23 Aug 2012 03:26:52 +0000 Subject: vlan: add helper which can be called to see if device is used by vlan also, remove unused vlan_info definition from header CC: Patrick McHardy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8ca533c95de0..b258da88f675 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -368,3 +368,9 @@ void vlan_vids_del_by_dev(struct net_device *dev, vlan_vid_del(dev, vid_info->vid); } EXPORT_SYMBOL(vlan_vids_del_by_dev); + +bool vlan_uses_dev(const struct net_device *dev) +{ + return rtnl_dereference(dev->vlan_info) ? true : false; +} +EXPORT_SYMBOL(vlan_uses_dev); -- cgit v1.2.1 From 7c4a56fec379ac0d7754e0d4da6a7361f1a4fe64 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 23 Aug 2012 07:05:17 +0000 Subject: tcp: fix cwnd reduction for non-sack recovery The cwnd reduction in fast recovery is based on the number of packets newly delivered per ACK. For non-sack connections every DUPACK signifies a packet has been delivered, but the sender mistakenly skips counting them for cwnd reduction. The fix is to compute newly_acked_sacked after DUPACKs are accounted in sacked_out for non-sack connections. Signed-off-by: Yuchung Cheng Acked-by: Nandita Dukkipati Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85308b90df80..6e38c6c23caa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, bool is_dupack, + int prior_sacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); + int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } else do_lost = tcp_try_undo_partial(sk, pkts_acked); + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) @@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); @@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - int newly_acked_sacked = 0; bool frto_cwnd = false; /* If the ack is older than previous acks @@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); pkts_acked = prior_packets - tp->packets_out; - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) @@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3718,8 +3718,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - newly_acked_sacked = tp->sacked_out - prior_sacked; - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } -- cgit v1.2.1 From 5f2d04f1f9b52604fca6ee08a77972c0df67e082 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:13:55 +0200 Subject: ipv4: fix path MTU discovery with connection tracking IPv4 conntrack defragments incoming packet at the PRE_ROUTING hook and (in case of forwarded packets) refragments them at POST_ROUTING independent of the IP_DF flag. Refragmentation uses the dst_mtu() of the local route without caring about the original fragment sizes, thereby breaking PMTUD. This patch fixes this by keeping track of the largest received fragment with IP_DF set and generates an ICMP fragmentation required error during refragmentation if that size exceeds the MTU. Signed-off-by: Patrick McHardy Acked-by: Eric Dumazet Acked-by: David S. Miller --- net/ipv4/ip_fragment.c | 8 +++++++- net/ipv4/ip_output.c | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..fa6a12c51066 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -523,6 +523,10 @@ found: if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; + if (ip_hdr(skb)->frag_off & htons(IP_DF) && + skb->len + ihl > qp->q.max_size) + qp->q.max_size = skb->len + ihl; + if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); @@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = NULL; head->dev = dev; head->tstamp = qp->q.stamp; + IPCB(head)->frag_max_size = qp->q.max_size; iph = ip_hdr(head); - iph->frag_off = 0; + /* max_size != 0 implies at least one fragment had IP_DF set */ + iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c196d749daf2..a5beab1dc958 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); - if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(ip_skb_dst_mtu(skb))); -- cgit v1.2.1 From cc110922da7e902b62d18641a370fec01a9fa794 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 23 Aug 2012 21:32:43 -0300 Subject: Bluetooth: Change signature of smp_conn_security() To make it clear that it may be called from contexts that may not have any knowledge of L2CAP, we change the connection parameter, to receive a hci_conn. This also makes it clear that it is checking the security of the link. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 11 ++++++----- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/smp.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index daa149b7003c..4ea1710a4783 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1199,14 +1199,15 @@ clean: static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan *chan; + struct hci_conn *hcon = conn->hcon; BT_DBG("conn %p", conn); - if (!conn->hcon->out && conn->hcon->type == LE_LINK) + if (!hcon->out && hcon->type == LE_LINK) l2cap_le_conn_ready(conn); - if (conn->hcon->out && conn->hcon->type == LE_LINK) - smp_conn_security(conn, conn->hcon->pending_sec_level); + if (hcon->out && hcon->type == LE_LINK) + smp_conn_security(hcon, hcon->pending_sec_level); mutex_lock(&conn->chan_lock); @@ -1219,8 +1220,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) continue; } - if (conn->hcon->type == LE_LINK) { - if (smp_conn_security(conn, chan->sec_level)) + if (hcon->type == LE_LINK) { + if (smp_conn_security(hcon, chan->sec_level)) l2cap_chan_ready(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b94abd30e6f9..45cb0b0dd2c7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -615,7 +615,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch break; } - if (smp_conn_security(conn, sec.level)) + if (smp_conn_security(conn->hcon, sec.level)) break; sk->sk_state = BT_CONFIG; chan->state = BT_CONFIG; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 98ffc1b6a6fa..8c225ef349cd 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -760,9 +760,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } -int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) +int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { - struct hci_conn *hcon = conn->hcon; + struct l2cap_conn *conn = hcon->l2cap_data; struct smp_chan *smp = conn->smp_chan; __u8 authreq; -- cgit v1.2.1 From d8343f125710fb596f7a88cd756679f14f4e77b9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 23 Aug 2012 21:32:44 -0300 Subject: Bluetooth: Fix sending a HCI Authorization Request over LE links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case that the link is already in the connected state and a Pairing request arrives from the mgmt interface, hci_conn_security() would be called but it was not considering LE links. Reported-by: João Paulo Rechi Vita Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5ad7da217474..3c094e78dde9 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -29,6 +29,7 @@ #include #include #include +#include static void hci_le_connect(struct hci_conn *conn) { @@ -619,6 +620,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("hcon %p", conn); + if (conn->type == LE_LINK) + return smp_conn_security(conn, sec_level); + /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) return 1; -- cgit v1.2.1 From 072a9c48600409d72aeb0d5b29fbb75861a06631 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 24 Aug 2012 21:41:11 +0000 Subject: netpoll: revert 6bdb7fe3104 and fix be_poll() instead Against -net. In the patch "netpoll: re-enable irq in poll_napi()", I tried to fix the following warning: [100718.051041] ------------[ cut here ]------------ [100718.051048] WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x7d/0xb0() (Not tainted) [100718.051049] Hardware name: ProLiant BL460c G7 ... [100718.051068] Call Trace: [100718.051073] [] ? warn_slowpath_common+0x87/0xc0 [100718.051075] [] ? warn_slowpath_null+0x1a/0x20 [100718.051077] [] ? local_bh_enable_ip+0x7d/0xb0 [100718.051080] [] ? _spin_unlock_bh+0x1b/0x20 [100718.051085] [] ? be_process_mcc+0x74/0x230 [be2net] [100718.051088] [] ? be_poll_tx_mcc+0x16c/0x290 [be2net] [100718.051090] [] ? netpoll_poll_dev+0xd6/0x490 [100718.051095] [] ? bond_poll_controller+0x75/0x80 [bonding] [100718.051097] [] ? netpoll_poll_dev+0x45/0x490 [100718.051100] [] ? ksize+0x19/0x80 [100718.051102] [] ? netpoll_send_skb_on_dev+0x157/0x240 by reenabling IRQ before calling ->poll, but it seems more problems are introduced after that patch: http://ozlabs.org/~akpm/stuff/IMG_20120824_122054.jpg http://marc.info/?l=linux-netdev&m=134563282530588&w=2 So it is safe to fix be2net driver code directly. This patch reverts the offending commit and fixes be_poll() by avoid disabling BH there, this is okay because be_poll() can be called either by poll_napi() which already disables IRQ, or by net_rx_action() which already disables BH. Reported-by: Andrew Morton Reported-by: Sylvain Munaut Cc: Sylvain Munaut Cc: Andrew Morton Cc: David Miller Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Signed-off-by: Cong Wang Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- net/core/netpoll.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 346b1eb83a1f..e4ba3e70c174 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -168,24 +168,16 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int budget = 16; - WARN_ON_ONCE(!irqs_disabled()); - list_for_each_entry(napi, &dev->napi_list, dev_list) { - local_irq_enable(); if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - rcu_read_lock_bh(); budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), napi, budget); - rcu_read_unlock_bh(); spin_unlock(&napi->poll_lock); - if (!budget) { - local_irq_disable(); + if (!budget) break; - } } - local_irq_disable(); } } -- cgit v1.2.1 From 590e3f79a21edd2e9857ac3ced25ba6b2a491ef8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 28 Aug 2012 22:05:51 +0200 Subject: ipvs: IPv6 MTU checking cleanup and bugfix Cleaning up the IPv6 MTU checking in the IPVS xmit code, by using a common helper function __mtu_check_toobig_v6(). The MTU check for tunnel mode can also use this helper as ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) is qual to skb->len. And the 'mtu' variable have been adjusted before calling helper. Notice, this also fixes a bug, as the the MTU check in ip_vs_dr_xmit_v6() were missing a check for skb_is_gso(). This bug e.g. caused issues for KVM IPVS setups, where different Segmentation Offloading techniques are utilized, between guests, via the virtio driver. This resulted in very bad performance, due to the ICMPv6 "too big" messages didn't affect the sender. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 543a554008af..67a39786b0a1 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -85,6 +85,15 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +static inline bool +__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) +{ + if (skb->len > mtu && !skb_is_gso(skb)) { + return true; /* Packet size violate MTU size */ + } + return false; +} + /* Get route to daddr, update *saddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, u32 rtos, int rt_mode, __be32 *saddr) @@ -491,7 +500,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -712,7 +721,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -946,8 +955,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && - !skb_is_gso(skb)) { + /* MTU checking: Notice that 'mtu' have been adjusted before hand */ + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1113,7 +1122,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1349,7 +1358,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); -- cgit v1.2.1 From 4cdd34084d539c758d00c5dc7bf95db2e4f2bc70 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:13:58 +0200 Subject: netfilter: nf_conntrack_ipv6: improve fragmentation handling The IPv6 conntrack fragmentation currently has a couple of shortcomings. Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the defragmented packet is then passed to conntrack, the resulting conntrack information is attached to each original fragment and the fragments then continue their way through the stack. Helper invocation occurs in the POSTROUTING hook, at which point only the original fragments are available. The result of this is that fragmented packets are never passed to helpers. This patch improves the situation in the following way: - If a reassembled packet belongs to a connection that has a helper assigned, the reassembled packet is passed through the stack instead of the original fragments. - During defragmentation, the largest received fragment size is stored. On output, the packet is refragmented if required. If the largest received fragment size exceeds the outgoing MTU, a "packet too big" message is generated, thus behaving as if the original fragments were passed through the stack from an outside point of view. - The ipv6_helper() hook function can't receive fragments anymore for connections using a helper, so it is switched to use ipv6_skip_exthdr() instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the reassembled packets are passed to connection tracking helpers. The result of this is that we can properly track fragmented packets, but still generate ICMPv6 Packet too big messages if we would have before. This patch is also required as a precondition for IPv6 NAT, where NAT helpers might enlarge packets up to a point that they require fragmentation. In that case we can't generate Packet too big messages since the proper MTU can't be calculated in all cases (f.i. when changing textual representation of a variable amount of addresses), so the packet is transparently fragmented iff the original packet or fragments would have fit the outgoing MTU. IPVS parts by Jesper Dangaard Brouer . Signed-off-by: Patrick McHardy --- net/ipv6/ip6_output.c | 7 +++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 41 ++++++++++++++++++++------ net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++++-- net/netfilter/ipvs/ip_vs_xmit.c | 9 +++++- 4 files changed, 61 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5b2d63ed793e..a4f6263fddca 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu && !skb_is_gso(skb)) { + if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || + (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu)) { + if (unlikely(!skb->local_df && skb->len > mtu) || + (IP6CB(skb)->frag_max_size && + IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4794f96cf2e0..521ddca876f8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; - unsigned char pnum = ipv6_hdr(skb)->nexthdr; - + unsigned int ret; + __be16 frag_off; + int protoff; + u8 nexthdr; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, if (!helper) return NF_ACCEPT; - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, - skb->len - extoff); - if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { + nexthdr = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } @@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum, static unsigned int __ipv6_conntrack_in(struct net *net, unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; + const struct nf_conn_help *help; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net, if (ret != NF_ACCEPT) return ret; } + + /* Conntrack helpers need the entire reassembled packet in the + * POST_ROUTING hook. + */ + ct = nf_ct_get(reasm, &ctinfo); + if (ct != NULL && !nf_ct_is_untracked(ct)) { + help = nfct_help(ct); + if (help && help->helper) { + nf_conntrack_get_reasm(skb); + NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, + (struct net_device *)in, + (struct net_device *)out, + okfn, NF_IP6_PRI_CONNTRACK + 1); + return NF_DROP_ERR(-ECANCELED); + } + } + nf_conntrack_get(reasm->nfct); skb->nfct = reasm->nfct; skb->nfctinfo = reasm->nfctinfo; @@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c9c78c2e666b..f94fb3ac2a79 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + unsigned int payload_len; int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { @@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, goto err; } + payload_len = ntohs(ipv6_hdr(skb)->payload_len); + offset = ntohs(fhdr->frag_off) & ~0x7; - end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - + end = offset + (payload_len - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { @@ -307,6 +310,8 @@ found: skb->dev = NULL; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + if (payload_len > fq->q.max_size) + fq->q.max_size = payload_len; atomic_add(skb->truesize, &nf_init_frags.mem); /* The first fragment. @@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) } atomic_sub(head->truesize, &nf_init_frags.mem); + head->local_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { struct sk_buff *s, *s2; + unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { nf_conntrack_put_reasm(s->nfct_reasm); @@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, s2 = s->next; s->next = NULL; - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + if (ret != -ECANCELED) + ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, + in, out, okfn, + NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + else + kfree_skb(s); + s = s2; } nf_conntrack_put_reasm(skb); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 67a39786b0a1..56f6d5d81a77 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) static inline bool __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) { - if (skb->len > mtu && !skb_is_gso(skb)) { + if (IP6CB(skb)->frag_max_size) { + /* frag_max_size tell us that, this packet have been + * defragmented by netfilter IPv6 conntrack module. + */ + if (IP6CB(skb)->frag_max_size > mtu) + return true; /* largest fragment violate MTU */ + } + else if (skb->len > mtu && !skb_is_gso(skb)) { return true; /* Packet size violate MTU size */ } return false; -- cgit v1.2.1 From 2b60af017880f7dc35d1fac65f48fc94f8a3c1ec Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:13:59 +0200 Subject: netfilter: nf_conntrack_ipv6: fix tracking of ICMPv6 error messages containing fragments ICMPv6 error messages are tracked by extracting the conntrack tuple of the inner packet and looking up the corresponding conntrack entry. Tuple extraction uses the ->get_l4proto() callback, which in case of fragments returns NEXTHDR_FRAGMENT instead of the upper protocol, even for the first fragment when the entire next header is present, resulting in a failure to find the correct connection tracking entry. This patch changes ipv6_get_l4proto() to use ipv6_skip_exthdr() instead of nf_ct_ipv6_skip_exthdr() in order to skip fragment headers when the fragment offset is zero. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 63 +++----------------------- 1 file changed, 6 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 521ddca876f8..dcf6010f68d9 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -64,82 +64,31 @@ static int ipv6_print_tuple(struct seq_file *s, tuple->src.u3.ip6, tuple->dst.u3.ip6); } -/* - * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c - * - * This function parses (probably truncated) exthdr set "hdr" - * of length "len". "nexthdrp" initially points to some place, - * where type of the first header can be found. - * - * It skips all well-known exthdrs, and returns pointer to the start - * of unparsable area i.e. the first header with unknown type. - * if success, *nexthdr is updated by type/protocol of this header. - * - * NOTES: - it may return pointer pointing beyond end of packet, - * if the last recognized header is truncated in the middle. - * - if packet is truncated, so that all parsed headers are skipped, - * it returns -1. - * - if packet is fragmented, return pointer of the fragment header. - * - ESP is unparsable for now and considered like - * normal payload protocol. - * - Note also special handling of AUTH header. Thanks to IPsec wizards. - */ - -static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, - u8 *nexthdrp, int len) -{ - u8 nexthdr = *nexthdrp; - - while (ipv6_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr hdr; - int hdrlen; - - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return -1; - if (nexthdr == NEXTHDR_NONE) - break; - if (nexthdr == NEXTHDR_FRAGMENT) - break; - if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) - BUG(); - if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hdr.hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(&hdr); - - nexthdr = hdr.nexthdr; - len -= hdrlen; - start += hdrlen; - } - - *nexthdrp = nexthdr; - return start; -} - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { unsigned int extoff = nhoff + sizeof(struct ipv6hdr); - unsigned char pnum; + __be16 frag_off; int protoff; + u8 nexthdr; if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), - &pnum, sizeof(pnum)) != 0) { + &nexthdr, sizeof(nexthdr)) != 0) { pr_debug("ip6_conntrack_core: can't get nexthdr\n"); return -NF_ACCEPT; } - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > skb->len)) { + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } *dataoff = protoff; - *protonum = pnum; + *protonum = nexthdr; return NF_ACCEPT; } -- cgit v1.2.1 From 811927ccfe90fbfcfff5253ba7f95057f6cae692 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:01 +0200 Subject: netfilter: nf_conntrack: restrict NAT helper invocation to IPv4 The NAT helpers currently only handle IPv4 packets correctly. Restrict invocation of the helpers to IPv4 in preparation of IPv6 NAT. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_amanda.c | 3 ++- net/netfilter/nf_conntrack_ftp.c | 3 ++- net/netfilter/nf_conntrack_h323_main.c | 41 ++++++++++++++++++++++++---------- net/netfilter/nf_conntrack_irc.c | 3 ++- net/netfilter/nf_conntrack_sip.c | 18 ++++++++++----- net/netfilter/nf_conntrack_tftp.c | 3 ++- 6 files changed, 49 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index f2de8c55ac50..184c0dc6e437 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -154,7 +154,8 @@ static int amanda_help(struct sk_buff *skb, IPPROTO_TCP, NULL, &port); nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); - if (nf_nat_amanda && ct->status & IPS_NAT_MASK) + if (nf_nat_amanda && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(skb, ctinfo, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 4bb771d1f57a..3e1587e63c03 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -487,7 +487,8 @@ static int help(struct sk_buff *skb, /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); - if (nf_nat_ftp && ct->status & IPS_NAT_MASK) + if (nf_nat_ftp && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp); else { diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 4283b207e63b..517c5e3fe7c6 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -295,6 +295,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, @@ -353,6 +354,7 @@ static int expect_t120(struct sk_buff *skb, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_t120 = rcu_dereference(nat_t120_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, @@ -688,6 +690,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_h245 = rcu_dereference(nat_h245_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, @@ -811,6 +814,7 @@ static int expect_callforwarding(struct sk_buff *skb, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, @@ -852,7 +856,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((setup->options & eSetup_UUIE_destCallSignalAddress) && - (set_h225_addr) && ct->status & IPS_NAT_MASK && + (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { @@ -868,7 +873,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, } if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && - (set_h225_addr) && ct->status & IPS_NAT_MASK && + (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { @@ -1278,7 +1284,8 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nat_q931 = rcu_dereference(nat_q931_hook); - if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ + if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { /* Need NAT */ ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -1306,7 +1313,8 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: GRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) /* NATed */ return set_ras_addr(skb, ct, ctinfo, data, &grq->rasAddress, 1); return 0; @@ -1374,7 +1382,8 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, return -1; set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) { + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { ret = set_ras_addr(skb, ct, ctinfo, data, rrq->rasAddress.item, rrq->rasAddress.count); @@ -1405,7 +1414,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: RCF\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { ret = set_sig_addr(skb, ct, ctinfo, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); @@ -1453,7 +1463,8 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: URQ\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { ret = set_sig_addr(skb, ct, ctinfo, data, urq->callSignalAddress.item, urq->callSignalAddress.count); @@ -1491,6 +1502,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && port == info->sig_port[dir] && + nf_ct_l3num(ct) == NFPROTO_IPV4 && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ return set_h225_addr(skb, data, 0, @@ -1503,7 +1515,8 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &arq->srcCallSignalAddress, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && - set_h225_addr && ct->status & IPS_NAT_MASK) { + set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { /* Calling ARQ */ return set_h225_addr(skb, data, 0, &arq->srcCallSignalAddress, @@ -1535,7 +1548,8 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) { /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) return set_sig_addr(skb, ct, ctinfo, data, &acf->destCallSignalAddress, 1); return 0; @@ -1571,7 +1585,8 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: LRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) return set_ras_addr(skb, ct, ctinfo, data, &lrq->replyAddress, 1); return 0; @@ -1628,7 +1643,8 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: IRR\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) { + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { ret = set_ras_addr(skb, ct, ctinfo, data, &irr->rasAddress, 1); if (ret < 0) @@ -1636,7 +1652,8 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, } set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { ret = set_sig_addr(skb, ct, ctinfo, data, irr->callSignalAddress.item, irr->callSignalAddress.count); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 009c52cfd1ec..e06dc2fab19f 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -204,7 +204,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, IPPROTO_TCP, NULL, &port); nf_nat_irc = rcu_dereference(nf_nat_irc_hook); - if (nf_nat_irc && ct->status & IPS_NAT_MASK) + if (nf_nat_irc && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_irc(skb, ctinfo, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 5c0a112aeee6..d08e0baf4640 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -981,7 +981,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, IPPROTO_UDP, NULL, &rtcp_port); nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) + if (nf_nat_sdp_media && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK && !direct_rtp) ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); @@ -1104,7 +1105,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, return ret; /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { + if (maddr_len && nf_nat_sdp_addr && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen, mediaoff, c_hdr, SDP_HDR_MEDIA, &rtp_addr); @@ -1116,7 +1118,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, /* Update session connection and owner addresses */ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) + if (nf_nat_sdp_session && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1275,7 +1278,8 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) + if (nf_nat_sip_expect && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp, matchoff, matchlen); else { @@ -1453,7 +1457,8 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, else ret = process_sip_response(skb, dataoff, dptr, datalen); - if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { + if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen)) ret = NF_DROP; @@ -1534,7 +1539,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, datalen = datalen + diff - msglen; } - if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { + if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); if (nf_nat_sip_seq_adjust) nf_nat_sip_seq_adjust(skb, tdiff); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 81fc61c05263..9363e1c66466 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -72,7 +72,8 @@ static int tftp_help(struct sk_buff *skb, nf_ct_dump_tuple(&exp->tuple); nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); - if (nf_nat_tftp && ct->status & IPS_NAT_MASK) + if (nf_nat_tftp && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(skb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; -- cgit v1.2.1 From 051966c0c644a1c96092d4206e00704ade813c9a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:04 +0200 Subject: netfilter: nf_nat: add protoff argument to packet mangling functions For mangling IPv6 packets the protocol header offset needs to be known by the NAT packet mangling functions. Add a so far unused protoff argument and convert the conntrack and NAT helpers to use it in preparation of IPv6 NAT. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +- net/ipv4/netfilter/nf_nat_amanda.c | 3 +- net/ipv4/netfilter/nf_nat_ftp.c | 3 +- net/ipv4/netfilter/nf_nat_h323.c | 48 ++++--- net/ipv4/netfilter/nf_nat_helper.c | 9 +- net/ipv4/netfilter/nf_nat_irc.c | 3 +- net/ipv4/netfilter/nf_nat_pptp.c | 6 +- net/ipv4/netfilter/nf_nat_sip.c | 96 +++++++------ net/netfilter/ipvs/ip_vs_ftp.c | 1 + net/netfilter/nf_conntrack_amanda.c | 5 +- net/netfilter/nf_conntrack_ftp.c | 3 +- net/netfilter/nf_conntrack_h323_main.c | 191 +++++++++++++++---------- net/netfilter/nf_conntrack_irc.c | 3 +- net/netfilter/nf_conntrack_pptp.c | 18 +-- net/netfilter/nf_conntrack_sip.c | 95 +++++++----- 15 files changed, 295 insertions(+), 195 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index e7ff2dcab6ce..4ada3295d9a7 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -31,7 +31,8 @@ int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo); + enum ip_conntrack_info ctinfo, + unsigned int protoff); EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -149,7 +150,8 @@ static unsigned int ipv4_confirm(unsigned int hooknum, typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 3c04d24e2976..75464b62f5f2 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -26,6 +26,7 @@ MODULE_ALIAS("ip_nat_amanda"); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -61,7 +62,7 @@ static unsigned int help(struct sk_buff *skb, sprintf(buffer, "%u", port); ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index e462a957d080..5589f3af4a8e 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -55,6 +55,7 @@ static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, static unsigned int nf_nat_ftp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -100,7 +101,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, matchlen, buffer, buflen)) goto out; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c6784a18c1c4..d2c228db38b5 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -21,7 +21,7 @@ #include /****************************************************************************/ -static int set_addr(struct sk_buff *skb, +static int set_addr(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { @@ -40,7 +40,7 @@ static int set_addr(struct sk_buff *skb, if (ip_hdr(skb)->protocol == IPPROTO_TCP) { if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - addroff, sizeof(buf), + protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); return -1; @@ -54,7 +54,7 @@ static int set_addr(struct sk_buff *skb, *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - addroff, sizeof(buf), + protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); return -1; @@ -69,22 +69,22 @@ static int set_addr(struct sk_buff *skb, } /****************************************************************************/ -static int set_h225_addr(struct sk_buff *skb, +static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) { - return set_addr(skb, data, dataoff, taddr->ipAddress.ip, + return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff *skb, +static int set_h245_addr(struct sk_buff *skb, unsigned protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) { - return set_addr(skb, data, dataoff, + return set_addr(skb, protoff, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } @@ -92,7 +92,7 @@ static int set_h245_addr(struct sk_buff *skb, /****************************************************************************/ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { const struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -118,7 +118,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, port, &ct->tuplehash[!dir].tuple.dst.u3.ip, info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -129,7 +130,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, port, &ct->tuplehash[!dir].tuple.src.u3.ip, info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -143,7 +145,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { int dir = CTINFO2DIR(ctinfo); @@ -159,7 +161,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3.ip, ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -172,7 +174,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, @@ -244,7 +246,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -275,7 +277,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { @@ -307,7 +309,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -326,7 +328,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { @@ -363,7 +365,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, dataoff, taddr, + if (set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -416,7 +418,8 @@ static void ip_nat_q931_expect(struct nf_conn *new, /****************************************************************************/ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, TransportAddress *taddr, int idx, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -453,7 +456,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, 0, &taddr[idx], + if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -464,7 +467,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(skb, data, 0, &taddr[0], + set_h225_addr(skb, protoff, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -507,6 +510,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new, /****************************************************************************/ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) @@ -541,7 +545,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(skb, data, dataoff, taddr, + if (!set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 2e59ad0b90ca..2fefec5e757c 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -206,6 +206,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, @@ -257,6 +258,7 @@ int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, @@ -387,7 +389,8 @@ nf_nat_sack_adjust(struct sk_buff *skb, int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) + enum ip_conntrack_info ctinfo, + unsigned int protoff) { struct tcphdr *tcph; int dir; @@ -401,10 +404,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) return 0; - tcph = (void *)skb->data + ip_hdrlen(skb); + tcph = (void *)skb->data + protoff; if (after(ntohl(tcph->seq), this_way->correction_pos)) seqoff = this_way->offset_after; else diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 979ae165f4ef..5b0c20a1f08d 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -29,6 +29,7 @@ MODULE_ALIAS("ip_nat_irc"); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -66,7 +67,7 @@ static unsigned int help(struct sk_buff *skb, buffer, &ip, port); ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, - matchoff, matchlen, buffer, + protoff, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 388140881ebe..31ef890d894b 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -113,6 +113,7 @@ static int pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) @@ -175,7 +176,7 @@ pptp_outbound_pkt(struct sk_buff *skb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -216,6 +217,7 @@ static int pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) { @@ -268,7 +270,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4ad9cf173992..df626af8413c 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -30,7 +30,8 @@ MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, +static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, const char *buffer, unsigned int buflen) @@ -46,7 +47,7 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, matchoff += dataoff - baseoff; if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, buflen, false)) return 0; } else { @@ -54,7 +55,7 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, matchoff += dataoff - baseoff; if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, buflen)) return 0; } @@ -65,7 +66,8 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, return 1; } -static int map_addr(struct sk_buff *skb, unsigned int dataoff, +static int map_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, union nf_inet_addr *addr, __be16 port) @@ -94,11 +96,12 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff, buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); } -static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, +static int map_sip_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, enum sip_header_types type) { @@ -111,11 +114,12 @@ static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, &matchoff, &matchlen, &addr, &port) <= 0) return 1; - return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port); + return map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port); } -static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -132,8 +136,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && - !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + !map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; request = 1; } else @@ -164,8 +168,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, } olen = *datalen; - if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; matchend = matchoff + matchlen + *datalen - olen; @@ -179,7 +183,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.dst.u3.ip); - if (!mangle_packet(skb, dataoff, dptr, datalen, + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -193,7 +197,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.src.u3.ip); - if (!mangle_packet(skb, dataoff, dptr, datalen, + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -207,7 +211,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); - if (!mangle_packet(skb, dataoff, dptr, datalen, + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -221,13 +225,14 @@ next: SIP_HDR_CONTACT, &in_header, &matchoff, &matchlen, &addr, &port) > 0) { - if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; } - if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) + if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; return NF_ACCEPT; @@ -272,7 +277,8 @@ static void ip_nat_sip_expected(struct nf_conn *ct, } } -static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, unsigned int matchoff, @@ -326,7 +332,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, if (exp->tuple.dst.u3.ip != exp->saved_ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sprintf(buffer, "%pI4:%u", &newip, port); - if (!mangle_packet(skb, dataoff, dptr, datalen, + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) goto err; } @@ -337,7 +343,8 @@ err: return NF_DROP; } -static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, +static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -359,11 +366,12 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, return 0; buflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); } -static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, +static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, enum sdp_header_types type, @@ -377,11 +385,12 @@ static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, &matchoff, &matchlen) <= 0) return -ENOENT; - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen) ? 0 : -EINVAL; + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; } -static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, enum sdp_header_types type, @@ -392,14 +401,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, unsigned int buflen; buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, - buffer, buflen)) + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, + sdpoff, type, term, buffer, buflen)) return 0; - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, @@ -409,14 +419,15 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, unsigned int buflen; buflen = sprintf(buffer, "%u", port); - if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen)) + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) return 0; - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, const union nf_inet_addr *addr) @@ -426,12 +437,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, + switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, buffer, buflen)) { case 0: @@ -448,12 +459,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff return 0; } - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, +static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp, @@ -514,7 +526,7 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && - !ip_nat_sdp_port(skb, dataoff, dptr, datalen, + !ip_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, port)) goto err2; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index ad70b7e4ac4a..4f53a5f04437 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -268,6 +268,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * packet. */ ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + iph->ihl * 4, start-data, end-start, buf, buf_len); if (ret) { diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 184c0dc6e437..e0212b5494b1 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -40,6 +40,7 @@ MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -156,8 +157,8 @@ static int amanda_help(struct sk_buff *skb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(skb, ctinfo, off - dataoff, - len, exp); + ret = nf_nat_amanda(skb, ctinfo, protoff, + off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 3e1587e63c03..c0f4a5ba9016 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -48,6 +48,7 @@ module_param(loose, bool, 0600); unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp); @@ -490,7 +491,7 @@ static int help(struct sk_buff *skb, if (nf_nat_ftp && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, - matchoff, matchlen, exp); + protoff, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ if (nf_ct_expect_related(exp) != 0) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 517c5e3fe7c6..1b30b0dee708 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -49,12 +49,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff *skb, +int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff *skb, +int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) @@ -62,16 +62,17 @@ int (*set_h225_addr_hook) (struct sk_buff *skb, int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, __be16 rtp_port, @@ -80,24 +81,28 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; @@ -251,6 +256,7 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, /****************************************************************************/ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { @@ -298,7 +304,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -325,6 +331,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { @@ -357,7 +364,7 @@ static int expect_t120(struct sk_buff *skb, nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -376,6 +383,7 @@ static int expect_t120(struct sk_buff *skb, static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H2250LogicalChannelParameters *channel) { @@ -383,7 +391,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -392,7 +400,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -404,6 +412,7 @@ static int process_h245_channel(struct sk_buff *skb, /****************************************************************************/ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, OpenLogicalChannel *olc) { @@ -414,7 +423,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -432,7 +442,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(skb, ct, ctinfo, data, dataoff, + process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -450,7 +461,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -463,7 +474,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) { H2250LogicalChannelAckParameters *ack; @@ -479,7 +490,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -498,7 +510,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, + protoff, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -507,7 +520,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, + protoff, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -517,7 +531,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -530,14 +544,15 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) { switch (mscm->choice) { case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(skb, ct, ctinfo, data, dataoff, + return process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -546,7 +561,8 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(skb, ct, ctinfo, data, dataoff, + return process_olca(skb, ct, ctinfo, + protoff, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -597,7 +613,8 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(skb, ct, ctinfo, protoff, + &data, dataoff, &mscm) < 0) goto drop; } @@ -661,7 +678,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, /****************************************************************************/ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr) { int dir = CTINFO2DIR(ctinfo); @@ -693,7 +710,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -779,6 +796,7 @@ static int callforward_do_filter(const union nf_inet_addr *src, static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr) { @@ -817,7 +835,8 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(skb, ct, ctinfo, + protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -835,6 +854,7 @@ static int expect_callforwarding(struct sk_buff *skb, /****************************************************************************/ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Setup_UUIE *setup) { @@ -848,7 +868,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -864,7 +884,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(skb, protoff, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -881,7 +901,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(skb, protoff, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -891,7 +911,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -905,6 +926,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, CallProceeding_UUIE *callproc) { @@ -914,7 +936,7 @@ static int process_callproceeding(struct sk_buff *skb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -922,7 +944,8 @@ static int process_callproceeding(struct sk_buff *skb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -935,6 +958,7 @@ static int process_callproceeding(struct sk_buff *skb, /****************************************************************************/ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Connect_UUIE *connect) { @@ -944,7 +968,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -952,7 +976,8 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -965,6 +990,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Alerting_UUIE *alert) { @@ -974,7 +1000,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -982,7 +1008,8 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -995,6 +1022,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Facility_UUIE *facility) { @@ -1005,15 +1033,15 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(skb, ct, ctinfo, data, - dataoff, + return expect_callforwarding(skb, ct, ctinfo, + protoff, data, dataoff, &facility-> alternativeAddress); return 0; } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1021,7 +1049,8 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1034,6 +1063,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Progress_UUIE *progress) { @@ -1043,7 +1073,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1051,7 +1081,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1064,7 +1095,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, Q931 *q931) + unsigned int protoff, unsigned char **data, int dataoff, + Q931 *q931) { H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu; int i; @@ -1072,28 +1104,29 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(skb, ct, ctinfo, data, dataoff, + ret = process_setup(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(skb, ct, ctinfo, + protoff, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(skb, ct, ctinfo, data, dataoff, + ret = process_connect(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(skb, ct, ctinfo, data, dataoff, + ret = process_alerting(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(skb, ct, ctinfo, data, dataoff, + ret = process_facility(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(skb, ct, ctinfo, data, dataoff, + ret = process_progress(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1107,7 +1140,8 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(skb, ct, ctinfo, data, dataoff, + ret = process_h245(skb, ct, ctinfo, + protoff, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1152,7 +1186,8 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(skb, ct, ctinfo, protoff, + &data, dataoff, &q931) < 0) goto drop; } @@ -1249,7 +1284,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, /****************************************************************************/ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1286,7 +1321,8 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, nat_q931 = rcu_dereference(nat_q931_hook); if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); + ret = nat_q931(skb, ct, ctinfo, protoff, data, + taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1306,6 +1342,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, GatekeeperRequest *grq) { typeof(set_ras_addr_hook) set_ras_addr; @@ -1315,7 +1352,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(skb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, protoff, data, &grq->rasAddress, 1); return 0; } @@ -1323,6 +1360,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, GatekeeperConfirm *gcf) { int dir = CTINFO2DIR(ctinfo); @@ -1367,6 +1405,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RegistrationRequest *rrq) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1375,7 +1414,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(skb, ct, ctinfo, data, + ret = expect_q931(skb, ct, ctinfo, protoff, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) @@ -1384,7 +1423,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, protoff, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1403,6 +1442,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RegistrationConfirm *rcf) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1416,7 +1456,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1453,6 +1493,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, UnregistrationRequest *urq) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1465,7 +1506,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1486,6 +1527,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, AdmissionRequest *arq) { const struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1505,7 +1547,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, nf_ct_l3num(ct) == NFPROTO_IPV4 && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(skb, protoff, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1518,7 +1560,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(skb, protoff, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1530,6 +1572,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, AdmissionConfirm *acf) { int dir = CTINFO2DIR(ctinfo); @@ -1550,7 +1593,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - return set_sig_addr(skb, ct, ctinfo, data, + return set_sig_addr(skb, ct, ctinfo, protoff, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1578,6 +1621,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, LocationRequest *lrq) { typeof(set_ras_addr_hook) set_ras_addr; @@ -1587,7 +1631,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - return set_ras_addr(skb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, protoff, data, &lrq->replyAddress, 1); return 0; } @@ -1595,6 +1639,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, LocationConfirm *lcf) { int dir = CTINFO2DIR(ctinfo); @@ -1634,6 +1679,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, InfoRequestResponse *irr) { int ret; @@ -1645,7 +1691,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, protoff, data, &irr->rasAddress, 1); if (ret < 0) return -1; @@ -1654,7 +1700,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1667,38 +1713,39 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(skb, ct, ctinfo, data, + return process_grq(skb, ct, ctinfo, protoff, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(skb, ct, ctinfo, data, + return process_gcf(skb, ct, ctinfo, protoff, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(skb, ct, ctinfo, data, + return process_rrq(skb, ct, ctinfo, protoff, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(skb, ct, ctinfo, data, + return process_rcf(skb, ct, ctinfo, protoff, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(skb, ct, ctinfo, data, + return process_urq(skb, ct, ctinfo, protoff, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(skb, ct, ctinfo, data, + return process_arq(skb, ct, ctinfo, protoff, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(skb, ct, ctinfo, data, + return process_acf(skb, ct, ctinfo, protoff, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(skb, ct, ctinfo, data, + return process_lrq(skb, ct, ctinfo, protoff, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(skb, ct, ctinfo, data, + return process_lcf(skb, ct, ctinfo, protoff, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(skb, ct, ctinfo, data, + return process_irr(skb, ct, ctinfo, protoff, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1738,7 +1785,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(skb, ct, ctinfo, protoff, &data, &ras) < 0) goto drop; accept: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index e06dc2fab19f..95d097cdb202 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -33,6 +33,7 @@ static DEFINE_SPINLOCK(irc_buffer_lock); unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) __read_mostly; @@ -206,7 +207,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(skb, ctinfo, + ret = nf_nat_irc(skb, ctinfo, protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 6fed9ec35248..cc7669ef0b95 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -45,14 +45,14 @@ static DEFINE_SPINLOCK(nf_pptp_lock); int (*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int (*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound); @@ -262,7 +262,7 @@ out_unexpect_orig: } static inline int -pptp_inbound_pkt(struct sk_buff *skb, +pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -376,7 +376,8 @@ pptp_inbound_pkt(struct sk_buff *skb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(skb, ct, ctinfo, + protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -389,7 +390,7 @@ invalid: } static inline int -pptp_outbound_pkt(struct sk_buff *skb, +pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -471,7 +472,8 @@ pptp_outbound_pkt(struct sk_buff *skb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(skb, ct, ctinfo, + protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -570,11 +572,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d08e0baf4640..590f0abaab8c 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,8 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, - const char **dptr, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); @@ -61,6 +61,7 @@ void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen, @@ -69,7 +70,8 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int matchlen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, @@ -79,7 +81,8 @@ unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, @@ -88,6 +91,7 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, + unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen, @@ -96,7 +100,8 @@ unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, @@ -883,7 +888,8 @@ static void flush_expectations(struct nf_conn *ct, bool media) spin_unlock_bh(&nf_conntrack_lock); } -static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, +static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, enum sip_expectation_classes class, @@ -960,7 +966,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, if (direct_rtp) { nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, dataoff, dptr, datalen, + !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -983,7 +989,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); if (nf_nat_sdp_media && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen, + ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { @@ -1024,7 +1030,8 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr, return NULL; } -static int process_sdp(struct sk_buff *skb, unsigned int dataoff, +static int process_sdp(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1098,7 +1105,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, else return NF_DROP; - ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen, + ret = set_expected_rtp_rtcp(skb, protoff, dataoff, + dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); if (ret != NF_ACCEPT) @@ -1107,7 +1115,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen, + ret = nf_nat_sdp_addr(skb, protoff, dataoff, + dptr, datalen, mediaoff, c_hdr, SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) @@ -1120,12 +1129,13 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); if (nf_nat_sdp_session && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, - &rtp_addr); + ret = nf_nat_sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, &rtp_addr); return ret; } -static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, +static int process_invite_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1135,13 +1145,14 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_update_response(struct sk_buff *skb, unsigned int dataoff, +static int process_update_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1151,13 +1162,14 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, +static int process_prack_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1167,13 +1179,14 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, +static int process_invite_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1183,13 +1196,14 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, unsigned int ret; flush_expectations(ct, true); - ret = process_sdp(skb, dataoff, dptr, datalen, cseq); + ret = process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); if (ret == NF_ACCEPT) ct_sip_info->invite_cseq = cseq; return ret; } -static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, +static int process_bye_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1204,7 +1218,8 @@ static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, * signalling connections. The expectation is marked inactive and is activated * when receiving a response indicating success from the registrar. */ -static int process_register_request(struct sk_buff *skb, unsigned int dataoff, +static int process_register_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1280,8 +1295,8 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); if (nf_nat_sip_expect && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp, - matchoff, matchlen); + ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -1296,7 +1311,8 @@ store_cseq: return ret; } -static int process_register_response(struct sk_buff *skb, unsigned int dataoff, +static int process_register_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1378,7 +1394,8 @@ static const struct sip_handler sip_handlers[] = { SIP_HANDLER("REGISTER", process_register_request, process_register_response), }; -static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, +static int process_sip_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1409,13 +1426,14 @@ static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, if (*datalen < matchend + handler->len || strnicmp(*dptr + matchend, handler->method, handler->len)) continue; - return handler->response(skb, dataoff, dptr, datalen, + return handler->response(skb, protoff, dataoff, dptr, datalen, cseq, code); } return NF_ACCEPT; } -static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, +static int process_sip_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1440,27 +1458,29 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, if (!cseq) return NF_DROP; - return handler->request(skb, dataoff, dptr, datalen, cseq); + return handler->request(skb, protoff, dataoff, dptr, datalen, + cseq); } return NF_ACCEPT; } static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) + unsigned int protoff, unsigned int dataoff, + const char **dptr, unsigned int *datalen) { typeof(nf_nat_sip_hook) nf_nat_sip; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) - ret = process_sip_request(skb, dataoff, dptr, datalen); + ret = process_sip_request(skb, protoff, dataoff, dptr, datalen); else - ret = process_sip_response(skb, dataoff, dptr, datalen); + ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen)) + if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, + dptr, datalen)) ret = NF_DROP; } @@ -1528,7 +1548,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (msglen > datalen) return NF_DROP; - ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen); + ret = process_sip_msg(skb, ct, protoff, dataoff, + &dptr, &msglen); if (ret != NF_ACCEPT) break; diff = msglen - origlen; @@ -1570,7 +1591,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, if (datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - return process_sip_msg(skb, ct, dataoff, &dptr, &datalen); + return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen); } static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; -- cgit v1.2.1 From c7232c9979cba684c50b64c513c4a83c9aa70563 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:06 +0200 Subject: netfilter: add protocol independent NAT core Convert the IPv4 NAT implementation to a protocol independent core and address family specific modules. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter.c | 37 -- net/ipv4/netfilter/Kconfig | 64 +- net/ipv4/netfilter/Makefile | 11 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 15 +- net/ipv4/netfilter/ipt_NETMAP.c | 15 +- net/ipv4/netfilter/ipt_REDIRECT.c | 15 +- net/ipv4/netfilter/iptable_nat.c | 320 +++++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 - net/ipv4/netfilter/nf_nat_amanda.c | 1 - net/ipv4/netfilter/nf_nat_core.c | 763 ---------------------- net/ipv4/netfilter/nf_nat_ftp.c | 1 - net/ipv4/netfilter/nf_nat_h323.c | 23 +- net/ipv4/netfilter/nf_nat_helper.c | 461 ------------- net/ipv4/netfilter/nf_nat_irc.c | 1 - net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 281 ++++++++ net/ipv4/netfilter/nf_nat_pptp.c | 15 +- net/ipv4/netfilter/nf_nat_proto_common.c | 114 ---- net/ipv4/netfilter/nf_nat_proto_dccp.c | 106 --- net/ipv4/netfilter/nf_nat_proto_gre.c | 30 +- net/ipv4/netfilter/nf_nat_proto_icmp.c | 24 +- net/ipv4/netfilter/nf_nat_proto_sctp.c | 96 --- net/ipv4/netfilter/nf_nat_proto_tcp.c | 91 --- net/ipv4/netfilter/nf_nat_proto_udp.c | 82 --- net/ipv4/netfilter/nf_nat_proto_udplite.c | 98 --- net/ipv4/netfilter/nf_nat_proto_unknown.c | 52 -- net/ipv4/netfilter/nf_nat_rule.c | 214 ------- net/ipv4/netfilter/nf_nat_sip.c | 19 +- net/ipv4/netfilter/nf_nat_standalone.c | 326 ---------- net/ipv4/netfilter/nf_nat_tftp.c | 1 - net/netfilter/Kconfig | 24 + net/netfilter/Makefile | 11 + net/netfilter/core.c | 5 + net/netfilter/nf_conntrack_core.c | 6 + net/netfilter/nf_conntrack_netlink.c | 35 +- net/netfilter/nf_conntrack_proto_tcp.c | 8 +- net/netfilter/nf_conntrack_sip.c | 6 +- net/netfilter/nf_nat_core.c | 854 +++++++++++++++++++++++++ net/netfilter/nf_nat_helper.c | 435 +++++++++++++ net/netfilter/nf_nat_proto_common.c | 112 ++++ net/netfilter/nf_nat_proto_dccp.c | 116 ++++ net/netfilter/nf_nat_proto_sctp.c | 103 +++ net/netfilter/nf_nat_proto_tcp.c | 85 +++ net/netfilter/nf_nat_proto_udp.c | 76 +++ net/netfilter/nf_nat_proto_udplite.c | 106 +++ net/netfilter/nf_nat_proto_unknown.c | 54 ++ net/netfilter/xt_nat.c | 167 +++++ 46 files changed, 2899 insertions(+), 2586 deletions(-) create mode 100644 net/ipv4/netfilter/iptable_nat.c delete mode 100644 net/ipv4/netfilter/nf_nat_core.c delete mode 100644 net/ipv4/netfilter/nf_nat_helper.c create mode 100644 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_common.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_dccp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_sctp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_tcp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udplite.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_unknown.c delete mode 100644 net/ipv4/netfilter/nf_nat_rule.c delete mode 100644 net/ipv4/netfilter/nf_nat_standalone.c create mode 100644 net/netfilter/nf_nat_core.c create mode 100644 net/netfilter/nf_nat_helper.c create mode 100644 net/netfilter/nf_nat_proto_common.c create mode 100644 net/netfilter/nf_nat_proto_dccp.c create mode 100644 net/netfilter/nf_nat_proto_sctp.c create mode 100644 net/netfilter/nf_nat_proto_tcp.c create mode 100644 net/netfilter/nf_nat_proto_udp.c create mode 100644 net/netfilter/nf_nat_proto_udplite.c create mode 100644 net/netfilter/nf_nat_proto_unknown.c create mode 100644 net/netfilter/xt_nat.c (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ed1b36783192..f1643c0c3587 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) } EXPORT_SYMBOL(ip_route_me_harder); -#ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff *skb) -{ - struct flowi fl; - unsigned int hh_len; - struct dst_entry *dst; - - if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) - return 0; - if (xfrm_decode_session(skb, &fl, AF_INET) < 0) - return -1; - - dst = skb_dst(skb); - if (dst->xfrm) - dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); - - dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); - if (IS_ERR(dst)) - return -1; - - skb_dst_drop(skb); - skb_dst_set(skb, dst); - - /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; - return 0; -} -EXPORT_SYMBOL(ip_xfrm_me_harder); -#endif - -void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); -EXPORT_SYMBOL(ip_nat_decode_session); - /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987a..b26629681bdb 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG To compile it as a module, choose M here. If unsure, say N. # NAT + specific targets: nf_conntrack -config NF_NAT - tristate "Full NAT" +config NF_NAT_IPV4 + tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n + select NF_NAT help - The Full NAT option allows masquerading, port forwarding and other + The IPv4 NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by the `nat' table in iptables: see the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y +if NF_NAT_IPV4 config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - depends on NF_NAT default m if NETFILTER_ADVANCED=n help Masquerading is a special case of NAT: all outgoing connections are @@ -174,7 +171,6 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_NETMAP tristate "NETMAP target support" - depends on NF_NAT depends on NETFILTER_ADVANCED help NETMAP is an implementation of static 1:1 NAT mapping of network @@ -185,7 +181,6 @@ config IP_NF_TARGET_NETMAP config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" - depends on NF_NAT depends on NETFILTER_ADVANCED help REDIRECT is a special case of NAT: all incoming connections are @@ -195,9 +190,11 @@ config IP_NF_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +endif + config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" - depends on NF_CONNTRACK_SNMP && NF_NAT + depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP ---help--- @@ -219,61 +216,46 @@ config NF_NAT_SNMP_BASIC # '&&' (6) # # (6) Returns the result of min(/expr/, /expr/). -config NF_NAT_PROTO_DCCP - tristate - depends on NF_NAT && NF_CT_PROTO_DCCP - default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_GRE tristate - depends on NF_NAT && NF_CT_PROTO_GRE - -config NF_NAT_PROTO_UDPLITE - tristate - depends on NF_NAT && NF_CT_PROTO_UDPLITE - default NF_NAT && NF_CT_PROTO_UDPLITE - -config NF_NAT_PROTO_SCTP - tristate - default NF_NAT && NF_CT_PROTO_SCTP - depends on NF_NAT && NF_CT_PROTO_SCTP - select LIBCRC32C + depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE config NF_NAT_FTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_FTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_IRC + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_TFTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_AMANDA + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_PPTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_H323 + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_SIP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c20674dc9452..0ea3acc510e2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -10,13 +10,11 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o endif endif -nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o -iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o - # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o -obj-$(CONFIG_NF_NAT) += nf_nat.o +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o @@ -32,10 +30,7 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -43,7 +38,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_NF_NAT) += iptable_nat.o +obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index cbb6a1a6f6f7..1c3aa28b51ae 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -19,9 +19,9 @@ #include #include #include -#include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; __be32 newsrc, nh; @@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newsrc, newsrc, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index b5bfbbabf70d..85028dc0425d 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen "); @@ -44,7 +44,7 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || @@ -61,10 +61,13 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - new_ip, new_ip, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = new_ip; + newrange.max_addr.ip = new_ip; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 7c0103a5203e..11407d7d2472 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -48,7 +48,7 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; __be32 newdst; const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -76,10 +76,13 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) } /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newdst, newdst, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c new file mode 100644 index 000000000000..9e0ffaf1d942 --- /dev/null +++ b/net/ipv4/netfilter/iptable_nat.c @@ -0,0 +1,320 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv4_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + * and local-out, and nf_nat_out protects post-routing. + */ + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv4_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + __be32 daddr = ip_hdr(skb)->daddr; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv4_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv4_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +static int __net_init iptable_nat_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + kfree(repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.nat_table); +} + +static struct pernet_operations iptable_nat_net_ops = { + .init = iptable_nat_net_init, + .exit = iptable_nat_net_exit, +}; + +static int __init iptable_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&iptable_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&iptable_nat_net_ops); +err1: + return err; +} + +static void __exit iptable_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + unregister_pernet_subsys(&iptable_nat_net_ops); +} + +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4ada3295d9a7..fcdd0c2406e6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -29,12 +29,6 @@ #include #include -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 75464b62f5f2..42d337881171 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -16,7 +16,6 @@ #include #include #include -#include #include MODULE_AUTHOR("Brian J. Murrell "); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c deleted file mode 100644 index 44b082fd48ab..000000000000 --- a/net/ipv4/netfilter/nf_nat_core.c +++ /dev/null @@ -1,763 +0,0 @@ -/* NAT for netfilter; shared with compatibility layer. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* For tcp_prot in getorigdst */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(nf_nat_lock); - -static struct nf_conntrack_l3proto *l3proto __read_mostly; - -#define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] - __read_mostly; - -static inline const struct nf_nat_protocol * -__nf_nat_proto_find(u_int8_t protonum) -{ - return rcu_dereference(nf_nat_protos[protonum]); -} - -/* We keep an extra hash for each conntrack, for fast searching. */ -static inline unsigned int -hash_by_src(const struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - unsigned int hash; - - /* Original src, to ensure we map it consistently if poss. */ - hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all ^ zone, - tuple->dst.protonum, nf_conntrack_hash_rnd); - return ((u64)hash * net->ipv4.nat_htable_size) >> 32; -} - -/* Is this tuple already taken? (not by us) */ -int -nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) -{ - /* Conntrack tracking doesn't keep track of outgoing tuples; only - incoming ones. NAT means they don't have a fixed mapping, - so we invert the tuple and look for the incoming reply. - - We could keep a separate hash if this proves too slow. */ - struct nf_conntrack_tuple reply; - - nf_ct_invert_tuplepr(&reply, tuple); - return nf_conntrack_tuple_taken(&reply, ignored_conntrack); -} -EXPORT_SYMBOL(nf_nat_used_tuple); - -/* If we source map this tuple so reply looks like reply_tuple, will - * that meet the constraints of range. */ -static int -in_range(const struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range) -{ - const struct nf_nat_protocol *proto; - int ret = 0; - - /* If we are supposed to map IPs, then we must be in the - range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & NF_NAT_RANGE_MAP_IPS) { - if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || - ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) - return 0; - } - - rcu_read_lock(); - proto = __nf_nat_proto_find(tuple->dst.protonum); - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || - proto->in_range(tuple, NF_NAT_MANIP_SRC, - &range->min, &range->max)) - ret = 1; - rcu_read_unlock(); - - return ret; -} - -static inline int -same_src(const struct nf_conn *ct, - const struct nf_conntrack_tuple *tuple) -{ - const struct nf_conntrack_tuple *t; - - t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - return (t->dst.protonum == tuple->dst.protonum && - t->src.u3.ip == tuple->src.u3.ip && - t->src.u.all == tuple->src.u.all); -} - -/* Only called for SRC manip */ -static int -find_appropriate_src(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_tuple *result, - const struct nf_nat_ipv4_range *range) -{ - unsigned int h = hash_by_src(net, zone, tuple); - const struct nf_conn_nat *nat; - const struct nf_conn *ct; - const struct hlist_node *n; - - rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { - ct = nat->ct; - if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { - /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(result, range)) { - rcu_read_unlock(); - return 1; - } - } - } - rcu_read_unlock(); - return 0; -} - -/* For [FUTURE] fragmentation handling, we want the least-used - src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus - if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports - 1-65535, we don't do pro-rata allocation based on ports; we choose - the ip with the lowest src-ip/dst-ip/proto usage. -*/ -static void -find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - const struct nf_conn *ct, - enum nf_nat_manip_type maniptype) -{ - __be32 *var_ipp; - /* Host order */ - u_int32_t minip, maxip, j; - - /* No IP mapping? Do nothing. */ - if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) - return; - - if (maniptype == NF_NAT_MANIP_SRC) - var_ipp = &tuple->src.u3.ip; - else - var_ipp = &tuple->dst.u3.ip; - - /* Fast path: only one choice. */ - if (range->min_ip == range->max_ip) { - *var_ipp = range->min_ip; - return; - } - - /* Hashing source and destination IPs gives a fairly even - * spread in practice (if there are a small number of IPs - * involved, there usually aren't that many connections - * anyway). The consistency means that servers see the same - * client coming from the same IP (some Internet Banking sites - * like this), even across reboots. */ - minip = ntohl(range->min_ip); - maxip = ntohl(range->max_ip); - j = jhash_2words((__force u32)tuple->src.u3.ip, - range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); - j = ((u64)j * (maxip - minip + 1)) >> 32; - *var_ipp = htonl(minip + j); -} - -/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, - * we change the source to map into the range. For NF_INET_PRE_ROUTING - * and NF_INET_LOCAL_OUT, we change the destination to map into the - * range. It might not be possible to get a unique tuple, but we try. - * At worst (or if we race), we will end up with a final duplicate in - * __ip_conntrack_confirm and drop the packet. */ -static void -get_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig_tuple, - const struct nf_nat_ipv4_range *range, - struct nf_conn *ct, - enum nf_nat_manip_type maniptype) -{ - struct net *net = nf_ct_net(ct); - const struct nf_nat_protocol *proto; - u16 zone = nf_ct_zone(ct); - - /* 1) If this srcip/proto/src-proto-part is currently mapped, - and that same mapping gives a unique tuple within the given - range, use that. - - This is only required for source (ie. NAT/masq) mappings. - So far, we don't do local source mappings, so multiple - manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { - /* try the original tuple first */ - if (in_range(orig_tuple, range)) { - if (!nf_nat_used_tuple(orig_tuple, ct)) { - *tuple = *orig_tuple; - return; - } - } else if (find_appropriate_src(net, zone, orig_tuple, tuple, - range)) { - pr_debug("get_unique_tuple: Found current src map\n"); - if (!nf_nat_used_tuple(tuple, ct)) - return; - } - } - - /* 2) Select the least-used IP/proto combination in the given - range. */ - *tuple = *orig_tuple; - find_best_ips_proto(zone, tuple, range, ct, maniptype); - - /* 3) The per-protocol part of the manip is made to map into - the range to make a unique tuple. */ - - rcu_read_lock(); - proto = __nf_nat_proto_find(orig_tuple->dst.protonum); - - /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { - if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { - if (proto->in_range(tuple, maniptype, &range->min, - &range->max) && - (range->min.all == range->max.all || - !nf_nat_used_tuple(tuple, ct))) - goto out; - } else if (!nf_nat_used_tuple(tuple, ct)) { - goto out; - } - } - - /* Last change: get protocol to try to obtain unique tuple. */ - proto->unique_tuple(tuple, range, maniptype, ct); -out: - rcu_read_unlock(); -} - -unsigned int -nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype) -{ - struct net *net = nf_ct_net(ct); - struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat; - - /* nat helper or nfctnetlink also setup binding */ - nat = nfct_nat(ct); - if (!nat) { - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } - - NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || - maniptype == NF_NAT_MANIP_DST); - BUG_ON(nf_nat_initialized(ct, maniptype)); - - /* What we've got will look like inverse of reply. Normally - this is what is in the conntrack, except for prior - manipulations (future optimization: if num_manips == 0, - orig_tp = - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - nf_ct_invert_tuplepr(&curr_tuple, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); - - if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { - struct nf_conntrack_tuple reply; - - /* Alter conntrack table so will recognize replies. */ - nf_ct_invert_tuplepr(&reply, &new_tuple); - nf_conntrack_alter_reply(ct, &reply); - - /* Non-atomic: we own this at the moment. */ - if (maniptype == NF_NAT_MANIP_SRC) - ct->status |= IPS_SRC_NAT; - else - ct->status |= IPS_DST_NAT; - } - - if (maniptype == NF_NAT_MANIP_SRC) { - unsigned int srchash; - - srchash = hash_by_src(net, nf_ct_zone(ct), - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate extension area */ - nat = nfct_nat(ct); - nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, - &net->ipv4.nat_bysource[srchash]); - spin_unlock_bh(&nf_nat_lock); - } - - /* It's done. */ - if (maniptype == NF_NAT_MANIP_DST) - ct->status |= IPS_DST_NAT_DONE; - else - ct->status |= IPS_SRC_NAT_DONE; - - return NF_ACCEPT; -} -EXPORT_SYMBOL(nf_nat_setup_info); - -/* Returns true if succeeded. */ -static bool -manip_pkt(u_int16_t proto, - struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct iphdr *iph; - const struct nf_nat_protocol *p; - - if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) - return false; - - iph = (void *)skb->data + iphdroff; - - /* Manipulate protcol part. */ - - /* rcu_read_lock()ed by nf_hook_slow */ - p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(skb, iphdroff, target, maniptype)) - return false; - - iph = (void *)skb->data + iphdroff; - - if (maniptype == NF_NAT_MANIP_SRC) { - csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); - iph->saddr = target->src.u3.ip; - } else { - csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); - iph->daddr = target->dst.u3.ip; - } - return true; -} - -/* Do packet manipulations according to nf_nat_setup_info. */ -unsigned int nf_nat_packet(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); - - if (mtype == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - /* Non-atomic: these bits don't change. */ - if (ct->status & statusbit) { - struct nf_conntrack_tuple target; - - /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - - if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) - return NF_DROP; - } - return NF_ACCEPT; -} -EXPORT_SYMBOL_GPL(nf_nat_packet); - -/* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int nf_nat_icmp_reply_translation(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb) -{ - struct { - struct icmphdr icmp; - struct iphdr ip; - } *inside; - struct nf_conntrack_tuple target; - int hdrlen = ip_hdrlen(skb); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - - inside = (void *)skb->data + hdrlen; - - /* We're actually going to mangle it beyond trivial checksum - adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) - return 0; - - /* Must be RELATED */ - NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED_REPLY); - - /* Redirects on non-null nats must be dropped, else they'll - start talking to each other without our translation, and be - confused... --RR */ - if (inside->icmp.type == ICMP_REDIRECT) { - /* If NAT isn't finished, assume it and drop. */ - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", skb, manip, - dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - - /* Change inner back to look like incoming packet. We do the - opposite manip on this hook to normal, because it might not - pass all hooks (locally-generated ICMP). Consider incoming - packet: PREROUTING (DST manip), routing produces ICMP, goes - through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + hdrlen; - inside->icmp.checksum = 0; - inside->icmp.checksum = - csum_fold(skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - /* Change outer to look the reply to an incoming packet - * (proto 0 means don't invert per-proto part). */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); - -/* Protocol registration. */ -int nf_nat_protocol_register(const struct nf_nat_protocol *proto) -{ - int ret = 0; - - spin_lock_bh(&nf_nat_lock); - if (rcu_dereference_protected( - nf_nat_protos[proto->protonum], - lockdep_is_held(&nf_nat_lock) - ) != &nf_nat_unknown_protocol) { - ret = -EBUSY; - goto out; - } - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); - out: - spin_unlock_bh(&nf_nat_lock); - return ret; -} -EXPORT_SYMBOL(nf_nat_protocol_register); - -/* No one stores the protocol anywhere; simply delete it. */ -void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) -{ - spin_lock_bh(&nf_nat_lock); - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], - &nf_nat_unknown_protocol); - spin_unlock_bh(&nf_nat_lock); - synchronize_rcu(); -} -EXPORT_SYMBOL(nf_nat_protocol_unregister); - -/* No one using conntrack by the time this called. */ -static void nf_nat_cleanup_conntrack(struct nf_conn *ct) -{ - struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - - if (nat == NULL || nat->ct == NULL) - return; - - NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static void nf_nat_move_storage(void *new, void *old) -{ - struct nf_conn_nat *new_nat = new; - struct nf_conn_nat *old_nat = old; - struct nf_conn *ct = old_nat->ct; - - if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) - return; - - spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static struct nf_ct_ext_type nat_extend __read_mostly = { - .len = sizeof(struct nf_conn_nat), - .align = __alignof__(struct nf_conn_nat), - .destroy = nf_nat_cleanup_conntrack, - .move = nf_nat_move_storage, - .id = NF_CT_EXT_NAT, - .flags = NF_CT_EXT_F_PREALLOC, -}; - -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - -#include -#include - -static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { - [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, - [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, -}; - -static int nfnetlink_parse_nat_proto(struct nlattr *attr, - const struct nf_conn *ct, - struct nf_nat_ipv4_range *range) -{ - struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_protocol *npt; - int err; - - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); - if (err < 0) - return err; - - rcu_read_lock(); - npt = __nf_nat_proto_find(nf_ct_protonum(ct)); - if (npt->nlattr_to_range) - err = npt->nlattr_to_range(tb, range); - rcu_read_unlock(); - return err; -} - -static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { - [CTA_NAT_MINIP] = { .type = NLA_U32 }, - [CTA_NAT_MAXIP] = { .type = NLA_U32 }, - [CTA_NAT_PROTO] = { .type = NLA_NESTED }, -}; - -static int -nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_ipv4_range *range) -{ - struct nlattr *tb[CTA_NAT_MAX+1]; - int err; - - memset(range, 0, sizeof(*range)); - - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); - if (err < 0) - return err; - - if (tb[CTA_NAT_MINIP]) - range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); - - if (!tb[CTA_NAT_MAXIP]) - range->max_ip = range->min_ip; - else - range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); - - if (range->min_ip) - range->flags |= NF_NAT_RANGE_MAP_IPS; - - if (!tb[CTA_NAT_PROTO]) - return 0; - - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); - if (err < 0) - return err; - - return 0; -} - -static int -nfnetlink_parse_nat_setup(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr) -{ - struct nf_nat_ipv4_range range; - - if (nfnetlink_parse_nat(attr, ct, &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, manip)) - return -EEXIST; - - return nf_nat_setup_info(ct, &range, manip); -} -#else -static int -nfnetlink_parse_nat_setup(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr) -{ - return -EOPNOTSUPP; -} -#endif - -static int __net_init nf_nat_net_init(struct net *net) -{ - /* Leave them the same for the moment. */ - net->ipv4.nat_htable_size = net->ct.htable_size; - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); - if (!net->ipv4.nat_bysource) - return -ENOMEM; - return 0; -} - -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - -static void __net_exit nf_nat_net_exit(struct net *net) -{ - nf_ct_iterate_cleanup(net, &clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); -} - -static struct pernet_operations nf_nat_net_ops = { - .init = nf_nat_net_init, - .exit = nf_nat_net_exit, -}; - -static struct nf_ct_helper_expectfn follow_master_nat = { - .name = "nat-follow-master", - .expectfn = nf_nat_follow_master, -}; - -static struct nfq_ct_nat_hook nfq_ct_nat = { - .seq_adjust = nf_nat_tcp_seq_adjust, -}; - -static int __init nf_nat_init(void) -{ - size_t i; - int ret; - - need_ipv4_conntrack(); - - ret = nf_ct_extend_register(&nat_extend); - if (ret < 0) { - printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); - return ret; - } - - ret = register_pernet_subsys(&nf_nat_net_ops); - if (ret < 0) - goto cleanup_extend; - - /* Sew in builtin protocols. */ - spin_lock_bh(&nf_nat_lock); - for (i = 0; i < MAX_IP_NAT_PROTO; i++) - RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); - spin_unlock_bh(&nf_nat_lock); - - /* Initialize fake conntrack so that NAT will skip it */ - nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - - l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); - - nf_ct_helper_expectfn_register(&follow_master_nat); - - BUG_ON(nf_nat_seq_adjust_hook != NULL); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, - nfnetlink_parse_nat_setup); - BUG_ON(nf_ct_nat_offset != NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); - RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); - return 0; - - cleanup_extend: - nf_ct_extend_unregister(&nat_extend); - return ret; -} - -static void __exit nf_nat_cleanup(void) -{ - unregister_pernet_subsys(&nf_nat_net_ops); - nf_ct_l3proto_put(l3proto); - nf_ct_extend_unregister(&nat_extend); - nf_ct_helper_expectfn_unregister(&follow_master_nat); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); - synchronize_net(); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-ipv4"); - -module_init(nf_nat_init); -module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 5589f3af4a8e..dd5e387fc03b 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index d2c228db38b5..9c3db10b22d3 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -392,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ nf_nat_follow_master(new, this); @@ -404,14 +403,15 @@ static void ip_nat_q931_expect(struct nf_conn *new, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = - new->master->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } @@ -490,20 +490,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_callforwarding_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = this->saved_ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } @@ -519,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, u_int16_t nated_port; /* Set expectations for NAT */ - exp->saved_ip = exp->tuple.dst.u3.ip; + exp->saved_addr = exp->tuple.dst.u3; exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->expectfn = ip_nat_callforwarding_expect; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c deleted file mode 100644 index 2fefec5e757c..000000000000 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ /dev/null @@ -1,461 +0,0 @@ -/* ip_nat_helper.c - generic support functions for NAT helpers - * - * (C) 2000-2002 Harald Welte - * (C) 2003-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DUMP_OFFSET(x) \ - pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ - x->offset_before, x->offset_after, x->correction_pos); - -static DEFINE_SPINLOCK(nf_nat_seqofs_lock); - -/* Setup TCP sequence correction given this change at this sequence */ -static inline void -adjust_tcp_sequence(u32 seq, - int sizediff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way = &nat->seq[dir]; - - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", - seq, sizediff); - - pr_debug("adjust_tcp_sequence: Seq_offset before: "); - DUMP_OFFSET(this_way); - - spin_lock_bh(&nf_nat_seqofs_lock); - - /* SYN adjust. If it's uninitialized, or this is after last - * correction, record it: we don't handle more than one - * adjustment in the window, but do deal with common case of a - * retransmit */ - if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; - } - spin_unlock_bh(&nf_nat_seqofs_lock); - - pr_debug("adjust_tcp_sequence: Seq_offset after: "); - DUMP_OFFSET(this_way); -} - -/* Get the offset value, for conntrack */ -s16 nf_nat_get_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way; - s16 offset; - - if (!nat) - return 0; - - this_way = &nat->seq[dir]; - spin_lock_bh(&nf_nat_seqofs_lock); - offset = after(seq, this_way->correction_pos) - ? this_way->offset_after : this_way->offset_before; - spin_unlock_bh(&nf_nat_seqofs_lock); - - return offset; -} -EXPORT_SYMBOL_GPL(nf_nat_get_offset); - -/* Frobs data inside this packet, which is linear. */ -static void mangle_contents(struct sk_buff *skb, - unsigned int dataoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - unsigned char *data; - - BUG_ON(skb_is_nonlinear(skb)); - data = skb_network_header(skb) + dataoff; - - /* move post-replacement */ - memmove(data + match_offset + rep_len, - data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + - match_offset + match_len)); - - /* insert data from buffer */ - memcpy(data + match_offset, rep_buffer, rep_len); - - /* update skb info */ - if (rep_len > match_len) { - pr_debug("nf_nat_mangle_packet: Extending packet by " - "%u from %u bytes\n", rep_len - match_len, skb->len); - skb_put(skb, rep_len - match_len); - } else { - pr_debug("nf_nat_mangle_packet: Shrinking packet from " - "%u from %u bytes\n", match_len - rep_len, skb->len); - __skb_trim(skb, skb->len + rep_len - match_len); - } - - /* fix IP hdr checksum information */ - ip_hdr(skb)->tot_len = htons(skb->len); - ip_send_check(ip_hdr(skb)); -} - -/* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff *skb, unsigned int extra) -{ - if (skb->len + extra > 65535) - return 0; - - if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) - return 0; - - return 1; -} - -void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - __be32 seq, s16 off) -{ - if (!off) - return; - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); -} -EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); - -void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - u32 ctinfo, int off) -{ - const struct tcphdr *th; - - if (nf_ct_protonum(ct) != IPPROTO_TCP) - return; - - th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); -} -EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); - -static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, - int datalen, __sum16 *check, int oldlen) -{ - struct rtable *rt = skb_rtable(skb); - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - iph->ihl * 4; - skb->csum_offset = (void *)check - data; - *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, 0); - } else { - *check = 0; - *check = csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, - csum_partial(data, datalen, - 0)); - if (iph->protocol == IPPROTO_UDP && !*check) - *check = CSUM_MANGLED_0; - } - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); -} - -/* Generic function for mangling variable-length address changes inside - * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX - * command in FTP). - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len, bool adjust) -{ - struct iphdr *iph; - struct tcphdr *tcph; - int oldlen, datalen; - - if (!skb_make_writable(skb, skb->len)) - return 0; - - if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) - return 0; - - SKB_LINEAR_ASSERT(skb); - - iph = ip_hdr(skb); - tcph = (void *)iph + iph->ihl*4; - - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + tcph->doff*4, - match_offset, match_len, rep_buffer, rep_len); - - datalen = skb->len - iph->ihl*4; - nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen); - - if (adjust && rep_len != match_len) - nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, - (int)rep_len - (int)match_len); - - return 1; -} -EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); - -/* Generic function for mangling variable-length address changes inside - * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX - * command in the Amanda protocol) - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * XXX - This function could be merged with nf_nat_mangle_tcp_packet which - * should be fairly easy to do. - */ -int -nf_nat_mangle_udp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - struct iphdr *iph; - struct udphdr *udph; - int datalen, oldlen; - - if (!skb_make_writable(skb, skb->len)) - return 0; - - if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) - return 0; - - iph = ip_hdr(skb); - udph = (void *)iph + iph->ihl*4; - - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + sizeof(*udph), - match_offset, match_len, rep_buffer, rep_len); - - /* update the length of the UDP packet */ - datalen = skb->len - iph->ihl*4; - udph->len = htons(datalen); - - /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) - return 1; - - nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen); - - return 1; -} -EXPORT_SYMBOL(nf_nat_mangle_udp_packet); - -/* Adjust one found SACK option including checksum correction */ -static void -sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - unsigned int sackoff, - unsigned int sackend, - struct nf_nat_seq *natseq) -{ - while (sackoff < sackend) { - struct tcp_sack_block_wire *sack; - __be32 new_start_seq, new_end_seq; - - sack = (void *)skb->data + sackoff; - if (after(ntohl(sack->start_seq) - natseq->offset_before, - natseq->correction_pos)) - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_after); - else - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_before); - - if (after(ntohl(sack->end_seq) - natseq->offset_before, - natseq->correction_pos)) - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_after); - else - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_before); - - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); - - inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); - inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); - sack->start_seq = new_start_seq; - sack->end_seq = new_end_seq; - sackoff += sizeof(*sack); - } -} - -/* TCP SACK sequence number adjustment */ -static inline unsigned int -nf_nat_sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dir, optoff, optend; - struct nf_conn_nat *nat = nfct_nat(ct); - - optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); - optend = ip_hdrlen(skb) + tcph->doff * 4; - - if (!skb_make_writable(skb, optend)) - return 0; - - dir = CTINFO2DIR(ctinfo); - - while (optoff < optend) { - /* Usually: option, length. */ - unsigned char *op = skb->data + optoff; - - switch (op[0]) { - case TCPOPT_EOL: - return 1; - case TCPOPT_NOP: - optoff++; - continue; - default: - /* no partial options */ - if (optoff + 1 == optend || - optoff + op[1] > optend || - op[1] < 2) - return 0; - if (op[0] == TCPOPT_SACK && - op[1] >= 2+TCPOLEN_SACK_PERBLOCK && - ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(skb, tcph, optoff+2, - optoff+op[1], &nat->seq[!dir]); - optoff += op[1]; - } - } - return 1; -} - -/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ -int -nf_nat_seq_adjust(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff) -{ - struct tcphdr *tcph; - int dir; - __be32 newseq, newack; - s16 seqoff, ackoff; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way, *other_way; - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; - - if (!skb_make_writable(skb, protoff + sizeof(*tcph))) - return 0; - - tcph = (void *)skb->data + protoff; - if (after(ntohl(tcph->seq), this_way->correction_pos)) - seqoff = this_way->offset_after; - else - seqoff = this_way->offset_before; - - if (after(ntohl(tcph->ack_seq) - other_way->offset_before, - other_way->correction_pos)) - ackoff = other_way->offset_after; - else - ackoff = other_way->offset_before; - - newseq = htonl(ntohl(tcph->seq) + seqoff); - newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); - - tcph->seq = newseq; - tcph->ack_seq = newack; - - return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); -} - -/* Setup NAT on this expected conntrack so it follows master. */ -/* If we fail to get a free NAT slot, we'll get dropped on confirm */ -void nf_nat_follow_master(struct nf_conn *ct, - struct nf_conntrack_expect *exp) -{ - struct nf_nat_ipv4_range range; - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* Change src to where master sends to */ - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); - - /* For DST manip, map port here to where it's expected. */ - range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); -} -EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 5b0c20a1f08d..1ce37f89ec78 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 000000000000..d8b2e14efddc --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -0,0 +1,281 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} +#endif /* CONFIG_XFRM */ + +static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); +} + +static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); +} + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, + target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, 1); +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { + .l3proto = NFPROTO_IPV4, + .in_range = nf_nat_ipv4_in_range, + .secure_port = nf_nat_ipv4_secure_port, + .manip_pkt = nf_nat_ipv4_manip_pkt, + .csum_update = nf_nat_ipv4_csum_update, + .csum_recalc = nf_nat_ipv4_csum_recalc, + .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv4_decode_session, +#endif +}; + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); + if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +static int __init nf_nat_l3proto_ipv4_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv4_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); + +module_init(nf_nat_l3proto_ipv4_init); +module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 31ef890d894b..a06d7d74817d 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; - struct nf_nat_ipv4_range range; + struct nf_nat_range range; ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; @@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; if (exp->dir == IP_CT_DIR_ORIGINAL) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; if (exp->dir == IP_CT_DIR_REPLY) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c deleted file mode 100644 index 9993bc93e102..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ /dev/null @@ -1,114 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - __be16 port; - - if (maniptype == NF_NAT_MANIP_SRC) - port = tuple->src.u.all; - else - port = tuple->dst.u.all; - - return ntohs(port) >= ntohs(min->all) && - ntohs(port) <= ntohs(max->all); -} -EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); - -void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u_int16_t *rover) -{ - unsigned int range_size, min, i; - __be16 *portptr; - u_int16_t off; - - if (maniptype == NF_NAT_MANIP_SRC) - portptr = &tuple->src.u.all; - else - portptr = &tuple->dst.u.all; - - /* If no range specified... */ - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == NF_NAT_MANIP_DST) - return; - - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr) < 512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min.all); - range_size = ntohs(range->max.all) - min + 1; - } - - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) - off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, - maniptype == NF_NAT_MANIP_SRC - ? tuple->dst.u.all - : tuple->src.u.all); - else - off = *rover; - - for (i = 0; ; ++off) { - *portptr = htons(min + off % range_size); - if (++i != range_size && nf_nat_used_tuple(tuple, ct)) - continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) - *rover = off; - return; - } - return; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); - -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) -int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_ipv4_range *range) -{ - if (tb[CTA_PROTONAT_PORT_MIN]) { - range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); - range->max.all = range->min.tcp.port; - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - if (tb[CTA_PROTONAT_PORT_MAX]) { - range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); -#endif diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c deleted file mode 100644 index 3f67138d187c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * DCCP NAT protocol helper - * - * Copyright (c) 2005, 2006. 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u_int16_t dccp_port_rover; - -static void -dccp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); -} - -static bool -dccp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (const void *)(skb->data + iphdroff); - struct dccp_hdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl * 4; - __be32 oldip, newip; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_dccp = { - .protonum = IPPROTO_DCCP, - .manip_pkt = dccp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = dccp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_dccp_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_dccp); -} - -static void __exit nf_nat_proto_dccp_fini(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_dccp); -} - -module_init(nf_nat_proto_dccp_init); -module_exit(nf_nat_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("DCCP NAT protocol helper"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 46ba0b9ab985..ea44f02563b5 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -28,8 +28,7 @@ #include #include -#include -#include +#include #include MODULE_LICENSE("GPL"); @@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ static void -gre_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +gre_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, min = 1; range_size = 0xffff; } else { - min = ntohs(range->min.gre.key); - range_size = ntohs(range->max.gre.key) - min + 1; + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; } pr_debug("min = %u, range_size = %u\n", min, range_size); @@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static bool -gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { const struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, return true; } -static const struct nf_nat_protocol gre = { - .protonum = IPPROTO_GRE, +static const struct nf_nat_l4proto gre = { + .l4proto = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_gre_init(void) { - return nf_nat_protocol_register(&gre); + return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); } static void __exit nf_nat_proto_gre_fini(void) { - nf_nat_protocol_unregister(&gre); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); } module_init(nf_nat_proto_gre_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index b35172851bae..eb303471bcf6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include static bool icmp_in_range(const struct nf_conntrack_tuple *tuple, @@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, } static void -icmp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, unsigned int range_size; unsigned int i; - range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; /* If no range specified... */ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; for (i = 0; ; ++id) { - tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + (id % range_size)); if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) return; @@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, static bool icmp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb, return true; } -const struct nf_nat_protocol nf_nat_protocol_icmp = { - .protonum = IPPROTO_ICMP, +const struct nf_nat_l4proto nf_nat_l4proto_icmp = { + .l4proto = IPPROTO_ICMP, .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c deleted file mode 100644 index 3cce9b6c1c29..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -static u_int16_t nf_sctp_port_rover; - -static void -sctp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); -} - -static bool -sctp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct sk_buff *frag; - sctp_sctphdr_t *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be32 crc32; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct sctphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - hdr->source = tuple->src.u.sctp.port; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - hdr->dest = tuple->dst.u.sctp.port; - } - - crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); - skb_walk_frags(skb, frag) - crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), - crc32); - crc32 = sctp_end_cksum(crc32); - hdr->checksum = crc32; - - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_sctp = { - .protonum = IPPROTO_SCTP, - .manip_pkt = sctp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = sctp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_sctp_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_sctp); -} - -static void __exit nf_nat_proto_sctp_exit(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_sctp); -} - -module_init(nf_nat_proto_sctp_init); -module_exit(nf_nat_proto_sctp_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SCTP NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c deleted file mode 100644 index 9fb4b4e72bbf..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ /dev/null @@ -1,91 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static u_int16_t tcp_port_rover; - -static void -tcp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); -} - -static bool -tcp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct tcphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport, oldport; - int hdrsize = 8; /* TCP connection tracking guarantees this much */ - - /* this could be a inner header returned in icmp packet; in such - cases we cannot update the checksum field since it is outside of - the 8 bytes of transport layer headers we are guaranteed */ - if (skb->len >= hdroff + sizeof(struct tcphdr)) - hdrsize = sizeof(struct tcphdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct tcphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.tcp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.tcp.port; - portptr = &hdr->dest; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); - return true; -} - -const struct nf_nat_protocol nf_nat_protocol_tcp = { - .protonum = IPPROTO_TCP, - .manip_pkt = tcp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = tcp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c deleted file mode 100644 index 9883336e628f..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static u_int16_t udp_port_rover; - -static void -udp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); -} - -static bool -udp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - } - *portptr = newport; - return true; -} - -const struct nf_nat_protocol nf_nat_protocol_udp = { - .protonum = IPPROTO_UDP, - .manip_pkt = udp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = udp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c deleted file mode 100644 index d24d10a7beb2..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ /dev/null @@ -1,98 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -static u_int16_t udplite_port_rover; - -static void -udplite_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); -} - -static bool -udplite_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - - *portptr = newport; - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_udplite = { - .protonum = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_udplite_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_udplite); -} - -static void __exit nf_nat_proto_udplite_fini(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_udplite); -} - -module_init(nf_nat_proto_udplite_init); -module_exit(nf_nat_proto_udplite_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c deleted file mode 100644 index e0afe8112b1c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ /dev/null @@ -1,52 +0,0 @@ -/* The "unknown" protocol. This is what is used for protocols we - * don't understand. It's returned by ip_ct_find_proto(). - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include -#include - -static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type manip_type, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return true; -} - -static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - /* Sorry: we can't help you; if it's not unique, we can't frob - anything. */ - return; -} - -static bool -unknown_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - return true; -} - -const struct nf_nat_protocol nf_nat_unknown_protocol = { - .manip_pkt = unknown_manip_pkt, - .in_range = unknown_in_range, - .unique_tuple = unknown_unique_tuple, -}; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c deleted file mode 100644 index d2a9dc314e0e..000000000000 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ /dev/null @@ -1,214 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Everything about the rules for NAT. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ - (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT) | \ - (1 << NF_INET_LOCAL_IN)) - -static const struct xt_table nat_table = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .me = THIS_MODULE, - .af = NFPROTO_IPV4, -}; - -/* Source NAT */ -static unsigned int -ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_IN); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); - NF_CT_ASSERT(par->out != NULL); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); -} - -static unsigned int -ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); -} - -static int ipt_snat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("SNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("DNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static unsigned int -alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) -{ - /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). - */ - struct nf_nat_ipv4_range range; - - range.flags = 0; - pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, - HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); - - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); -} - -int nf_nat_rule_find(struct sk_buff *skb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - int ret; - - ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); - - if (ret == NF_ACCEPT) { - if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) - /* NUL mapping */ - ret = alloc_null_binding(ct, hooknum); - } - return ret; -} - -static struct xt_target ipt_snat_reg __read_mostly = { - .name = "SNAT", - .target = ipt_snat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), - .checkentry = ipt_snat_checkentry, - .family = AF_INET, -}; - -static struct xt_target ipt_dnat_reg __read_mostly = { - .name = "DNAT", - .target = ipt_dnat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = ipt_dnat_checkentry, - .family = AF_INET, -}; - -static int __net_init nf_nat_rule_net_init(struct net *net) -{ - struct ipt_replace *repl; - - repl = ipt_alloc_initial_table(&nat_table); - if (repl == NULL) - return -ENOMEM; - net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); - kfree(repl); - if (IS_ERR(net->ipv4.nat_table)) - return PTR_ERR(net->ipv4.nat_table); - return 0; -} - -static void __net_exit nf_nat_rule_net_exit(struct net *net) -{ - ipt_unregister_table(net, net->ipv4.nat_table); -} - -static struct pernet_operations nf_nat_rule_net_ops = { - .init = nf_nat_rule_net_init, - .exit = nf_nat_rule_net_exit, -}; - -int __init nf_nat_rule_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_nat_rule_net_ops); - if (ret != 0) - goto out; - ret = xt_register_target(&ipt_snat_reg); - if (ret != 0) - goto unregister_table; - - ret = xt_register_target(&ipt_dnat_reg); - if (ret != 0) - goto unregister_snat; - - return ret; - - unregister_snat: - xt_unregister_target(&ipt_snat_reg); - unregister_table: - unregister_pernet_subsys(&nf_nat_rule_net_ops); - out: - return ret; -} - -void nf_nat_rule_cleanup(void) -{ - xt_unregister_target(&ipt_dnat_reg); - xt_unregister_target(&ipt_snat_reg); - unregister_pernet_subsys(&nf_nat_rule_net_ops); -} diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index df626af8413c..47a47186a791 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -255,15 +254,15 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip = exp->saved_ip; + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr = exp->saved_addr; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); /* Change src to where master sends to, but only if the connection @@ -271,8 +270,8 @@ static void ip_nat_sip_expected(struct nf_conn *ct, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } } @@ -307,7 +306,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, else port = ntohs(exp->tuple.dst.u.udp.port); - exp->saved_ip = exp->tuple.dst.u3.ip; + exp->saved_addr = exp->tuple.dst.u3; exp->tuple.dst.u3.ip = newip; exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; exp->dir = !dir; @@ -329,7 +328,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, if (port == 0) return NF_DROP; - if (exp->tuple.dst.u3.ip != exp->saved_ip || + if (exp->tuple.dst.u3.ip != exp->saved_addr.ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sprintf(buffer, "%pI4:%u", &newip, port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, @@ -485,13 +484,13 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, else rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; - rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; + rtp_exp->saved_addr = rtp_exp->tuple.dst.u3; rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; rtp_exp->dir = !dir; rtp_exp->expectfn = ip_nat_sip_expected; - rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; + rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3; rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; rtcp_exp->dir = !dir; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c deleted file mode 100644 index 3828a4229822..000000000000 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ /dev/null @@ -1,326 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_XFRM -static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) -{ - struct flowi4 *fl4 = &fl->u.ip4; - const struct nf_conn *ct; - const struct nf_conntrack_tuple *t; - enum ip_conntrack_info ctinfo; - enum ip_conntrack_dir dir; - unsigned long statusbit; - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return; - dir = CTINFO2DIR(ctinfo); - t = &ct->tuplehash[dir].tuple; - - if (dir == IP_CT_DIR_ORIGINAL) - statusbit = IPS_DST_NAT; - else - statusbit = IPS_SRC_NAT; - - if (ct->status & statusbit) { - fl4->daddr = t->dst.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_dport = t->dst.u.tcp.port; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl4->saddr = t->src.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_sport = t->src.u.tcp.port; - } -} -#endif - -static unsigned int -nf_nat_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); - - /* We never see fragments: conntrack defrags on pre-routing - and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - have dropped it. Hence it's the user's responsibilty to - packet filter it out, or implement conntrack/NAT for that - protocol. 8) --RR */ - if (!ct) - return NF_ACCEPT; - - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, skb)) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ - case IP_CT_NEW: - - /* Seen it before? This can happen for loopback, retrans, - or local packets.. */ - if (!nf_nat_initialized(ct, maniptype)) { - unsigned int ret; - - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); - if (ret != NF_ACCEPT) - return ret; - } else - pr_debug("Already setup manip %s for ct %p\n", - maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", - ct); - break; - - default: - /* ESTABLISHED */ - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == IP_CT_ESTABLISHED_REPLY); - } - - return nf_nat_packet(ct, ctinfo, hooknum, skb); -} - -static unsigned int -nf_nat_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - unsigned int ret; - __be32 daddr = ip_hdr(skb)->daddr; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(skb)->daddr) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_out(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; -#endif - unsigned int ret; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if ((ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip) || - (ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all) - ) - return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; - } -#endif - return ret; -} - -static unsigned int -nf_nat_local_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.dst.u3.ip != - ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; - } -#ifdef CONFIG_XFRM - else if (ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(skb)) - ret = NF_DROP; -#endif - } - return ret; -} - -/* We must be after connection tracking and before packet filtering. */ - -static struct nf_hook_ops nf_nat_ops[] __read_mostly = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_in, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_out, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_local_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, - }, -}; - -static int __init nf_nat_standalone_init(void) -{ - int ret = 0; - - need_ipv4_conntrack(); - -#ifdef CONFIG_XFRM - BUG_ON(ip_nat_decode_session != NULL); - RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); -#endif - ret = nf_nat_rule_init(); - if (ret < 0) { - pr_err("nf_nat_init: can't setup rules.\n"); - goto cleanup_decode_session; - } - ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - if (ret < 0) { - pr_err("nf_nat_init: can't register hooks.\n"); - goto cleanup_rule_init; - } - return ret; - - cleanup_rule_init: - nf_nat_rule_cleanup(); - cleanup_decode_session: -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - return ret; -} - -static void __exit nf_nat_standalone_fini(void) -{ - nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - nf_nat_rule_cleanup(); -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - /* Conntrack caches are unregistered in nf_conntrack_cleanup */ -} - -module_init(nf_nat_standalone_init); -module_exit(nf_nat_standalone_fini); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat"); diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 9dbb8d284f99..ccabbda71a3e 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include MODULE_AUTHOR("Magnus Boden "); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c19b214ffd57..91adddae20a4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -356,6 +356,30 @@ config NETFILTER_NETLINK_QUEUE_CT If this option is enabled, NFQUEUE can include Connection Tracking information together with the packet is the enqueued via NFNETLINK. +config NF_NAT + tristate + +config NF_NAT_NEEDED + bool + depends on NF_NAT + default y + +config NF_NAT_PROTO_DCCP + tristate + depends on NF_NAT && NF_CT_PROTO_DCCP + default NF_NAT && NF_CT_PROTO_DCCP + +config NF_NAT_PROTO_UDPLITE + tristate + depends on NF_NAT && NF_CT_PROTO_UDPLITE + default NF_NAT && NF_CT_PROTO_UDPLITE + +config NF_NAT_PROTO_SCTP + tristate + default NF_NAT && NF_CT_PROTO_SCTP + depends on NF_NAT && NF_CT_PROTO_SCTP + select LIBCRC32C + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1c5160f2278e..09c9451bc510 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -43,6 +43,17 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ + nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o + +obj-$(CONFIG_NF_NAT) += nf_nat.o +obj-$(CONFIG_NF_NAT) += xt_nat.o + +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o +obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o + # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8f4b0b2b6f80..e61b3ac9591b 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -275,6 +275,11 @@ EXPORT_SYMBOL_GPL(nfq_ct_nat_hook); #endif /* CONFIG_NF_CONNTRACK */ +#ifdef CONFIG_NF_NAT_NEEDED +void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(nf_nat_decode_session_hook); +#endif + #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_netfilter; EXPORT_SYMBOL(proc_net_netfilter); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf4875565d67..f83e79defed9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -55,6 +55,12 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); +int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff); +EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); + DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da4fc37a8578..966f5133a384 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,7 @@ #include #ifdef CONFIG_NF_NAT_NEEDED #include -#include +#include #include #endif @@ -1096,13 +1096,14 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) { typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + int err; parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(); - if (request_module("nf-nat-ipv4") < 0) { + if (request_module("nf-nat") < 0) { nfnl_lock(); rcu_read_lock(); return -EOPNOTSUPP; @@ -1115,7 +1116,26 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, return -EOPNOTSUPP; } - return parse_nat_setup(ct, manip, attr); + err = parse_nat_setup(ct, manip, attr); + if (err == -EAGAIN) { +#ifdef CONFIG_MODULES + rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); + nfnl_unlock(); + if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { + nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); + return -EOPNOTSUPP; + } + nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); +#else + err = -EOPNOTSUPP; +#endif + } + return err; } #endif @@ -1979,6 +1999,8 @@ nla_put_failure: return -1; } +static const union nf_inet_addr any_addr; + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2005,7 +2027,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; #ifdef CONFIG_NF_NAT_NEEDED - if (exp->saved_ip || exp->saved_proto.all) { + if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || + exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -2014,7 +2037,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; nat_tuple.src.l3num = nf_ct_l3num(master); - nat_tuple.src.u3.ip = exp->saved_ip; + nat_tuple.src.u3 = exp->saved_addr; nat_tuple.dst.protonum = nf_ct_protonum(master); nat_tuple.src.u = exp->saved_proto; @@ -2410,7 +2433,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, if (err < 0) return err; - exp->saved_ip = nat_tuple.src.u3.ip; + exp->saved_addr = nat_tuple.src.u3; exp->saved_proto = nat_tuple.src.u; exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR])); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a5ac11ebef33..9c2cc716f4a5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -505,10 +505,10 @@ static inline s16 nat_offset(const struct nf_conn *ct, return get_offset != NULL ? get_offset(ct, dir, seq) : 0; } -#define NAT_OFFSET(pf, ct, dir, seq) \ - (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#define NAT_OFFSET(ct, dir, seq) \ + (nat_offset(ct, dir, seq)) #else -#define NAT_OFFSET(pf, ct, dir, seq) 0 +#define NAT_OFFSET(ct, dir, seq) 0 #endif static bool tcp_in_window(const struct nf_conn *ct, @@ -541,7 +541,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ - receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 590f0abaab8c..d5174902db37 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -946,11 +946,11 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, break; #ifdef CONFIG_NF_NAT_NEEDED if (exp->tuple.src.l3num == AF_INET && !direct_rtp && - (exp->saved_ip != exp->tuple.dst.u3.ip || + (exp->saved_addr.ip != exp->tuple.dst.u3.ip || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && ct->status & IPS_NAT_MASK) { - daddr->ip = exp->saved_ip; - tuple.dst.u3.ip = exp->saved_ip; + daddr->ip = exp->saved_addr.ip; + tuple.dst.u3.ip = exp->saved_addr.ip; tuple.dst.u.udp.port = exp->saved_proto.udp.port; direct_rtp = 1; } else diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c new file mode 100644 index 000000000000..c577b753fb9a --- /dev/null +++ b/net/netfilter/nf_nat_core.c @@ -0,0 +1,854 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(nf_nat_lock); + +static DEFINE_MUTEX(nf_nat_proto_mutex); +static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] + __read_mostly; +static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] + __read_mostly; + + +inline const struct nf_nat_l3proto * +__nf_nat_l3proto_find(u8 family) +{ + return rcu_dereference(nf_nat_l3protos[family]); +} + +inline const struct nf_nat_l4proto * +__nf_nat_l4proto_find(u8 family, u8 protonum) +{ + return rcu_dereference(nf_nat_l4protos[family][protonum]); +} +EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); + +#ifdef CONFIG_XFRM +static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + u8 family; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + + family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(family); + if (l3proto == NULL) + goto out; + + dir = CTINFO2DIR(ctinfo); + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + l3proto->decode_session(skb, ct, dir, statusbit, fl); +out: + rcu_read_unlock(); +} + +int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (xfrm_decode_session(skb, &fl, family) < 0) + return -1; + + dst = skb_dst(skb); + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) + return -1; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; + return 0; +} +EXPORT_SYMBOL(nf_xfrm_me_harder); +#endif /* CONFIG_XFRM */ + +/* We keep an extra hash for each conntrack, for fast searching. */ +static inline unsigned int +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) +{ + unsigned int hash; + + /* Original src, to ensure we map it consistently if poss. */ + hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), + tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + return ((u64)hash * net->ct.nat_htable_size) >> 32; +} + +/* Is this tuple already taken? (not by us) */ +int +nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + * incoming ones. NAT means they don't have a fixed mapping, + * so we invert the tuple and look for the incoming reply. + * + * We could keep a separate hash if this proves too slow. + */ + struct nf_conntrack_tuple reply; + + nf_ct_invert_tuplepr(&reply, tuple); + return nf_conntrack_tuple_taken(&reply, ignored_conntrack); +} +EXPORT_SYMBOL(nf_nat_used_tuple); + +/* If we source map this tuple so reply looks like reply_tuple, will + * that meet the constraints of range. + */ +static int in_range(const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range) +{ + /* If we are supposed to map IPs, then we must be in the + * range specified, otherwise let this drag us onto a new src IP. + */ + if (range->flags & NF_NAT_RANGE_MAP_IPS && + !l3proto->in_range(tuple, range)) + return 0; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || + l4proto->in_range(tuple, NF_NAT_MANIP_SRC, + &range->min_proto, &range->max_proto)) + return 1; + + return 0; +} + +static inline int +same_src(const struct nf_conn *ct, + const struct nf_conntrack_tuple *tuple) +{ + const struct nf_conntrack_tuple *t; + + t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + return (t->dst.protonum == tuple->dst.protonum && + nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) && + t->src.u.all == tuple->src.u.all); +} + +/* Only called for SRC manip */ +static int +find_appropriate_src(struct net *net, u16 zone, + const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *result, + const struct nf_nat_range *range) +{ + unsigned int h = hash_by_src(net, zone, tuple); + const struct nf_conn_nat *nat; + const struct nf_conn *ct; + const struct hlist_node *n; + + hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { + ct = nat->ct; + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { + /* Copy source part from reply tuple. */ + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + if (in_range(l3proto, l4proto, result, range)) { + rcu_read_unlock(); + return 1; + } + } + } + return 0; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + * 1-65535, we don't do pro-rata allocation based on ports; we choose + * the ip with the lowest src-ip/dst-ip/proto usage. + */ +static void +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + const struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + union nf_inet_addr *var_ipp; + unsigned int i, max; + /* Host order */ + u32 minip, maxip, j, dist; + bool full_range; + + /* No IP mapping? Do nothing. */ + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + var_ipp = &tuple->src.u3; + else + var_ipp = &tuple->dst.u3; + + /* Fast path: only one choice. */ + if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { + *var_ipp = range->min_addr; + return; + } + + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + max = sizeof(var_ipp->ip) / sizeof(u32) - 1; + else + max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; + + /* Hashing source and destination IPs gives a fairly even + * spread in practice (if there are a small number of IPs + * involved, there usually aren't that many connections + * anyway). The consistency means that servers see the same + * client coming from the same IP (some Internet Banking sites + * like this), even across reboots. + */ + j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3), + range->flags & NF_NAT_RANGE_PERSISTENT ? + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + + full_range = false; + for (i = 0; i <= max; i++) { + /* If first bytes of the address are at the maximum, use the + * distance. Otherwise use the full range. + */ + if (!full_range) { + minip = ntohl((__force __be32)range->min_addr.all[i]); + maxip = ntohl((__force __be32)range->max_addr.all[i]); + dist = maxip - minip + 1; + } else { + minip = 0; + dist = ~0; + } + + var_ipp->all[i] = (__force __u32) + htonl(minip + (((u64)j * dist) >> 32)); + if (var_ipp->all[i] != range->max_addr.all[i]) + full_range = true; + + if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) + j ^= (__force u32)tuple->dst.u3.all[i]; + } +} + +/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, + * we change the source to map into the range. For NF_INET_PRE_ROUTING + * and NF_INET_LOCAL_OUT, we change the destination to map into the + * range. It might not be possible to get a unique tuple, but we try. + * At worst (or if we race), we will end up with a final duplicate in + * __ip_conntrack_confirm and drop the packet. */ +static void +get_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig_tuple, + const struct nf_nat_range *range, + struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; + struct net *net = nf_ct_net(ct); + u16 zone = nf_ct_zone(ct); + + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); + l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, + orig_tuple->dst.protonum); + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + * and that same mapping gives a unique tuple within the given + * range, use that. + * + * This is only required for source (ie. NAT/masq) mappings. + * So far, we don't do local source mappings, so multiple + * manips not an issue. + */ + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + /* try the original tuple first */ + if (in_range(l3proto, l4proto, orig_tuple, range)) { + if (!nf_nat_used_tuple(orig_tuple, ct)) { + *tuple = *orig_tuple; + goto out; + } + } else if (find_appropriate_src(net, zone, l3proto, l4proto, + orig_tuple, tuple, range)) { + pr_debug("get_unique_tuple: Found current src map\n"); + if (!nf_nat_used_tuple(tuple, ct)) + goto out; + } + } + + /* 2) Select the least-used IP/proto combination in the given range */ + *tuple = *orig_tuple; + find_best_ips_proto(zone, tuple, range, ct, maniptype); + + /* 3) The per-protocol part of the manip is made to map into + * the range to make a unique tuple. + */ + + /* Only bother mapping if it's not already in range and unique */ + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + if (l4proto->in_range(tuple, maniptype, + &range->min_proto, + &range->max_proto) && + (range->min_proto.all == range->max_proto.all || + !nf_nat_used_tuple(tuple, ct))) + goto out; + } else if (!nf_nat_used_tuple(tuple, ct)) { + goto out; + } + } + + /* Last change: get protocol to try to obtain unique tuple. */ + l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); +out: + rcu_read_unlock(); +} + +unsigned int +nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype) +{ + struct net *net = nf_ct_net(ct); + struct nf_conntrack_tuple curr_tuple, new_tuple; + struct nf_conn_nat *nat; + + /* nat helper or nfctnetlink also setup binding */ + nat = nfct_nat(ct); + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || + maniptype == NF_NAT_MANIP_DST); + BUG_ON(nf_nat_initialized(ct, maniptype)); + + /* What we've got will look like inverse of reply. Normally + * this is what is in the conntrack, except for prior + * manipulations (future optimization: if num_manips == 0, + * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) + */ + nf_ct_invert_tuplepr(&curr_tuple, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); + + if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { + struct nf_conntrack_tuple reply; + + /* Alter conntrack table so will recognize replies. */ + nf_ct_invert_tuplepr(&reply, &new_tuple); + nf_conntrack_alter_reply(ct, &reply); + + /* Non-atomic: we own this at the moment. */ + if (maniptype == NF_NAT_MANIP_SRC) + ct->status |= IPS_SRC_NAT; + else + ct->status |= IPS_DST_NAT; + } + + if (maniptype == NF_NAT_MANIP_SRC) { + unsigned int srchash; + + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + spin_lock_bh(&nf_nat_lock); + /* nf_conntrack_alter_reply might re-allocate extension aera */ + nat = nfct_nat(ct); + nat->ct = ct; + hlist_add_head_rcu(&nat->bysource, + &net->ct.nat_bysource[srchash]); + spin_unlock_bh(&nf_nat_lock); + } + + /* It's done. */ + if (maniptype == NF_NAT_MANIP_DST) + ct->status |= IPS_DST_NAT_DONE; + else + ct->status |= IPS_SRC_NAT_DONE; + + return NF_ACCEPT; +} +EXPORT_SYMBOL(nf_nat_setup_info); + +/* Do packet manipulations according to nf_nat_setup_info. */ +unsigned int nf_nat_packet(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff *skb) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned long statusbit; + enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); + + if (mtype == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + /* Non-atomic: these bits don't change. */ + if (ct->status & statusbit) { + struct nf_conntrack_tuple target; + + /* We are aiming to look like inverse of other direction. */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + + l3proto = __nf_nat_l3proto_find(target.src.l3num); + l4proto = __nf_nat_l4proto_find(target.src.l3num, + target.dst.protonum); + if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) + return NF_DROP; + } + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nf_nat_packet); + +struct nf_nat_proto_clean { + u8 l3proto; + u8 l4proto; + bool hash; +}; + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int nf_nat_proto_clean(struct nf_conn *i, void *data) +{ + const struct nf_nat_proto_clean *clean = data; + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || + (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) + return 0; + + if (clean->hash) { + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + spin_unlock_bh(&nf_nat_lock); + } else { + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | + IPS_SEQ_ADJUST); + } + return 0; +} + +static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + .l4proto = l4proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); + + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); +} + +static void nf_nat_l3proto_clean(u8 l3proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); + + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); +} + +/* Protocol registration. */ +int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) +{ + const struct nf_nat_l4proto **l4protos; + unsigned int i; + int ret = 0; + + mutex_lock(&nf_nat_proto_mutex); + if (nf_nat_l4protos[l3proto] == NULL) { + l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), + GFP_KERNEL); + if (l4protos == NULL) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < IPPROTO_MAX; i++) + RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); + + /* Before making proto_array visible to lockless readers, + * we must make sure its content is committed to memory. + */ + smp_wmb(); + + nf_nat_l4protos[l3proto] = l4protos; + } + + if (rcu_dereference_protected( + nf_nat_l4protos[l3proto][l4proto->l4proto], + lockdep_is_held(&nf_nat_proto_mutex) + ) != &nf_nat_l4proto_unknown) { + ret = -EBUSY; + goto out; + } + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); + out: + mutex_unlock(&nf_nat_proto_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); + +/* No one stores the protocol anywhere; simply delete it. */ +void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) +{ + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], + &nf_nat_l4proto_unknown); + mutex_unlock(&nf_nat_proto_mutex); + synchronize_rcu(); + + nf_nat_l4proto_clean(l3proto, l4proto->l4proto); +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); + +int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) +{ + int err; + + err = nf_ct_l3proto_try_module_get(l3proto->l3proto); + if (err < 0) + return err; + + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], + &nf_nat_l4proto_tcp); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], + &nf_nat_l4proto_udp); + mutex_unlock(&nf_nat_proto_mutex); + + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_l3proto_register); + +void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto) +{ + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL); + mutex_unlock(&nf_nat_proto_mutex); + synchronize_rcu(); + + nf_nat_l3proto_clean(l3proto->l3proto); + nf_ct_l3proto_module_put(l3proto->l3proto); +} +EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); + +/* No one using conntrack by the time this called. */ +static void nf_nat_cleanup_conntrack(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (nat == NULL || nat->ct == NULL) + return; + + NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + spin_unlock_bh(&nf_nat_lock); +} + +static void nf_nat_move_storage(void *new, void *old) +{ + struct nf_conn_nat *new_nat = new; + struct nf_conn_nat *old_nat = old; + struct nf_conn *ct = old_nat->ct; + + if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) + return; + + spin_lock_bh(&nf_nat_lock); + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + spin_unlock_bh(&nf_nat_lock); +} + +static struct nf_ct_ext_type nat_extend __read_mostly = { + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .destroy = nf_nat_cleanup_conntrack, + .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, +}; + +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, +}; + +static int nfnetlink_parse_nat_proto(struct nlattr *attr, + const struct nf_conn *ct, + struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_PROTONAT_MAX+1]; + const struct nf_nat_l4proto *l4proto; + int err; + + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; + + l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->nlattr_to_range) + err = l4proto->nlattr_to_range(tb, range); + + return err; +} + +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_PROTO] = { .type = NLA_NESTED }, +}; + +static int +nfnetlink_parse_nat(const struct nlattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) +{ + const struct nf_nat_l3proto *l3proto; + struct nlattr *tb[CTA_NAT_MAX+1]; + int err; + + memset(range, 0, sizeof(*range)); + + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; + + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) { + err = -EAGAIN; + goto out; + } + err = l3proto->nlattr_to_range(tb, range); + if (err < 0) + goto out; + + if (!tb[CTA_NAT_PROTO]) + goto out; + + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); +out: + rcu_read_unlock(); + return err; +} + +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + const struct nlattr *attr) +{ + struct nf_nat_range range; + int err; + + err = nfnetlink_parse_nat(attr, ct, &range); + if (err < 0) + return err; + if (nf_nat_initialized(ct, manip)) + return -EEXIST; + + return nf_nat_setup_info(ct, &range, manip); +} +#else +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + const struct nlattr *attr) +{ + return -EOPNOTSUPP; +} +#endif + +static int __net_init nf_nat_net_init(struct net *net) +{ + /* Leave them the same for the moment. */ + net->ct.nat_htable_size = net->ct.htable_size; + net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); + if (!net->ct.nat_bysource) + return -ENOMEM; + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + struct nf_nat_proto_clean clean = {}; + + nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + synchronize_rcu(); + nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + +static struct nf_ct_helper_expectfn follow_master_nat = { + .name = "nat-follow-master", + .expectfn = nf_nat_follow_master, +}; + +static struct nfq_ct_nat_hook nfq_ct_nat = { + .seq_adjust = nf_nat_tcp_seq_adjust, +}; + +static int __init nf_nat_init(void) +{ + int ret; + + ret = nf_ct_extend_register(&nat_extend); + if (ret < 0) { + printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + return ret; + } + + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) + goto cleanup_extend; + + nf_ct_helper_expectfn_register(&follow_master_nat); + + /* Initialize fake conntrack so that NAT will skip it */ + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); + + BUG_ON(nf_nat_seq_adjust_hook != NULL); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, + nfnetlink_parse_nat_setup); + BUG_ON(nf_ct_nat_offset != NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); + RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); +#ifdef CONFIG_XFRM + BUG_ON(nf_nat_decode_session_hook != NULL); + RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); +#endif + return 0; + + cleanup_extend: + nf_ct_extend_unregister(&nat_extend); + return ret; +} + +static void __exit nf_nat_cleanup(void) +{ + unsigned int i; + + unregister_pernet_subsys(&nf_nat_net_ops); + nf_ct_extend_unregister(&nat_extend); + nf_ct_helper_expectfn_unregister(&follow_master_nat); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); +#ifdef CONFIG_XFRM + RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); +#endif + for (i = 0; i < NFPROTO_NUMPROTO; i++) + kfree(nf_nat_l4protos[i]); + synchronize_net(); +} + +MODULE_LICENSE("GPL"); + +module_init(nf_nat_init); +module_exit(nf_nat_cleanup); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c new file mode 100644 index 000000000000..23c2b38676a6 --- /dev/null +++ b/net/netfilter/nf_nat_helper.c @@ -0,0 +1,435 @@ +/* nf_nat_helper.c - generic support functions for NAT helpers + * + * (C) 2000-2002 Harald Welte + * (C) 2003-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DUMP_OFFSET(x) \ + pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ + x->offset_before, x->offset_after, x->correction_pos); + +static DEFINE_SPINLOCK(nf_nat_seqofs_lock); + +/* Setup TCP sequence correction given this change at this sequence */ +static inline void +adjust_tcp_sequence(u32 seq, + int sizediff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way = &nat->seq[dir]; + + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + seq, sizediff); + + pr_debug("adjust_tcp_sequence: Seq_offset before: "); + DUMP_OFFSET(this_way); + + spin_lock_bh(&nf_nat_seqofs_lock); + + /* SYN adjust. If it's uninitialized, or this is after last + * correction, record it: we don't handle more than one + * adjustment in the window, but do deal with common case of a + * retransmit */ + if (this_way->offset_before == this_way->offset_after || + before(this_way->correction_pos, seq)) { + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += sizediff; + } + spin_unlock_bh(&nf_nat_seqofs_lock); + + pr_debug("adjust_tcp_sequence: Seq_offset after: "); + DUMP_OFFSET(this_way); +} + +/* Get the offset value, for conntrack */ +s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way; + s16 offset; + + if (!nat) + return 0; + + this_way = &nat->seq[dir]; + spin_lock_bh(&nf_nat_seqofs_lock); + offset = after(seq, this_way->correction_pos) + ? this_way->offset_after : this_way->offset_before; + spin_unlock_bh(&nf_nat_seqofs_lock); + + return offset; +} + +/* Frobs data inside this packet, which is linear. */ +static void mangle_contents(struct sk_buff *skb, + unsigned int dataoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + unsigned char *data; + + BUG_ON(skb_is_nonlinear(skb)); + data = skb_network_header(skb) + dataoff; + + /* move post-replacement */ + memmove(data + match_offset + rep_len, + data + match_offset + match_len, + skb->tail - (skb->network_header + dataoff + + match_offset + match_len)); + + /* insert data from buffer */ + memcpy(data + match_offset, rep_buffer, rep_len); + + /* update skb info */ + if (rep_len > match_len) { + pr_debug("nf_nat_mangle_packet: Extending packet by " + "%u from %u bytes\n", rep_len - match_len, skb->len); + skb_put(skb, rep_len - match_len); + } else { + pr_debug("nf_nat_mangle_packet: Shrinking packet from " + "%u from %u bytes\n", match_len - rep_len, skb->len); + __skb_trim(skb, skb->len + rep_len - match_len); + } + + if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + /* fix IP hdr checksum information */ + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + } else + ipv6_hdr(skb)->payload_len = + htons(skb->len - sizeof(struct ipv6hdr)); +} + +/* Unusual, but possible case. */ +static int enlarge_skb(struct sk_buff *skb, unsigned int extra) +{ + if (skb->len + extra > 65535) + return 0; + + if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) + return 0; + + return 1; +} + +void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s16 off) +{ + if (!off) + return; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); +} +EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); + +void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); + +/* Generic function for mangling variable-length address changes inside + * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX + * command in FTP). + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * */ +int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) +{ + const struct nf_nat_l3proto *l3proto; + struct tcphdr *tcph; + int oldlen, datalen; + + if (!skb_make_writable(skb, skb->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) + return 0; + + SKB_LINEAR_ASSERT(skb); + + tcph = (void *)skb->data + protoff; + + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + tcph->doff*4, + match_offset, match_len, rep_buffer, rep_len); + + datalen = skb->len - protoff; + + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check, + datalen, oldlen); + + if (adjust && rep_len != match_len) + nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); + + return 1; +} +EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); + +/* Generic function for mangling variable-length address changes inside + * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX + * command in the Amanda protocol) + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * XXX - This function could be merged with nf_nat_mangle_tcp_packet which + * should be fairly easy to do. + */ +int +nf_nat_mangle_udp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + const struct nf_nat_l3proto *l3proto; + struct udphdr *udph; + int datalen, oldlen; + + if (!skb_make_writable(skb, skb->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) + return 0; + + udph = (void *)skb->data + protoff; + + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + sizeof(*udph), + match_offset, match_len, rep_buffer, rep_len); + + /* update the length of the UDP packet */ + datalen = skb->len - protoff; + udph->len = htons(datalen); + + /* fix udp checksum if udp checksum was previously calculated */ + if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) + return 1; + + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, + datalen, oldlen); + + return 1; +} +EXPORT_SYMBOL(nf_nat_mangle_udp_packet); + +/* Adjust one found SACK option including checksum correction */ +static void +sack_adjust(struct sk_buff *skb, + struct tcphdr *tcph, + unsigned int sackoff, + unsigned int sackend, + struct nf_nat_seq *natseq) +{ + while (sackoff < sackend) { + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; + + sack = (void *)skb->data + sackoff; + if (after(ntohl(sack->start_seq) - natseq->offset_before, + natseq->correction_pos)) + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); + else + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); + + if (after(ntohl(sack->end_seq) - natseq->offset_before, + natseq->correction_pos)) + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); + else + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); + + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); + + inet_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + inet_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); + sack->start_seq = new_start_seq; + sack->end_seq = new_end_seq; + sackoff += sizeof(*sack); + } +} + +/* TCP SACK sequence number adjustment */ +static inline unsigned int +nf_nat_sack_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *tcph, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dir, optoff, optend; + struct nf_conn_nat *nat = nfct_nat(ct); + + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + tcph->doff * 4; + + if (!skb_make_writable(skb, optend)) + return 0; + + dir = CTINFO2DIR(ctinfo); + + while (optoff < optend) { + /* Usually: option, length. */ + unsigned char *op = skb->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + /* no partial options */ + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_SACK && + op[1] >= 2+TCPOLEN_SACK_PERBLOCK && + ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) + sack_adjust(skb, tcph, optoff+2, + optoff+op[1], &nat->seq[!dir]); + optoff += op[1]; + } + } + return 1; +} + +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ +int +nf_nat_seq_adjust(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff) +{ + struct tcphdr *tcph; + int dir; + __be32 newseq, newack; + s16 seqoff, ackoff; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way, *other_way; + + dir = CTINFO2DIR(ctinfo); + + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; + + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) + return 0; + + tcph = (void *)skb->data + protoff; + if (after(ntohl(tcph->seq), this_way->correction_pos)) + seqoff = this_way->offset_after; + else + seqoff = this_way->offset_before; + + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, + other_way->correction_pos)) + ackoff = other_way->offset_after; + else + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); + + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); + + tcph->seq = newseq; + tcph->ack_seq = newack; + + return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); +} + +/* Setup NAT on this expected conntrack so it follows master. */ +/* If we fail to get a free NAT slot, we'll get dropped on confirm */ +void nf_nat_follow_master(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c new file mode 100644 index 000000000000..9baaf734c142 --- /dev/null +++ b/net/netfilter/nf_nat_proto_common.c @@ -0,0 +1,112 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + if (maniptype == NF_NAT_MANIP_SRC) + port = tuple->src.u.all; + else + port = tuple->dst.u.all; + + return ntohs(port) >= ntohs(min->all) && + ntohs(port) <= ntohs(max->all); +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range); + +void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct, + u16 *rover) +{ + unsigned int range_size, min, i; + __be16 *portptr; + u_int16_t off; + + if (maniptype == NF_NAT_MANIP_SRC) + portptr = &tuple->src.u.all; + else + portptr = &tuple->dst.u.all; + + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == NF_NAT_MANIP_DST) + return; + + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr) < 512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min_proto.all); + range_size = ntohs(range->max_proto.all) - min + 1; + } + + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; + + for (i = 0; ; ++off) { + *portptr = htons(min + off % range_size); + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) + continue; + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + *rover = off; + return; + } + return; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); + +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_PROTONAT_PORT_MIN]) { + range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); + range->max_proto.all = range->min_proto.all; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + if (tb[CTA_PROTONAT_PORT_MAX]) { + range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range); +#endif diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c new file mode 100644 index 000000000000..c8be2cdac0bf --- /dev/null +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -0,0 +1,116 @@ +/* + * DCCP NAT protocol helper + * + * Copyright (c) 2005, 2006, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static u_int16_t dccp_port_rover; + +static void +dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &dccp_port_rover); +} + +static bool +dccp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct dccp_hdr *hdr; + __be16 *portptr, oldport, newport; + int hdrsize = 8; /* DCCP connection tracking guarantees this much */ + + if (skb->len >= hdroff + sizeof(struct dccp_hdr)) + hdrsize = sizeof(struct dccp_hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct dccp_hdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + newport = tuple->src.u.dccp.port; + portptr = &hdr->dccph_sport; + } else { + newport = tuple->dst.u.dccp.port; + portptr = &hdr->dccph_dport; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, + 0); + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { + .l4proto = IPPROTO_DCCP, + .manip_pkt = dccp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = dccp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_dccp_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); +err1: + return err; +} + +static void __exit nf_nat_proto_dccp_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + +} + +module_init(nf_nat_proto_dccp_init); +module_exit(nf_nat_proto_dccp_fini); + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("DCCP NAT protocol helper"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c new file mode 100644 index 000000000000..e64faa5ca893 --- /dev/null +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include + +static u_int16_t nf_sctp_port_rover; + +static void +sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &nf_sctp_port_rover); +} + +static bool +sctp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct sk_buff *frag; + sctp_sctphdr_t *hdr; + __be32 crc32; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct sctphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + hdr->source = tuple->src.u.sctp.port; + } else { + /* Get rid of dst port */ + hdr->dest = tuple->dst.u.sctp.port; + } + + crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); + skb_walk_frags(skb, frag) + crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), + crc32); + crc32 = sctp_end_cksum(crc32); + hdr->checksum = crc32; + + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { + .l4proto = IPPROTO_SCTP, + .manip_pkt = sctp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = sctp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_sctp_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); +err1: + return err; +} + +static void __exit nf_nat_proto_sctp_exit(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); +} + +module_init(nf_nat_proto_sctp_init); +module_exit(nf_nat_proto_sctp_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SCTP NAT protocol helper"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c new file mode 100644 index 000000000000..83ec8a6e4c36 --- /dev/null +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -0,0 +1,85 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static u16 tcp_port_rover; + +static void +tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &tcp_port_rover); +} + +static bool +tcp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct tcphdr *hdr; + __be16 *portptr, newport, oldport; + int hdrsize = 8; /* TCP connection tracking guarantees this much */ + + /* this could be a inner header returned in icmp packet; in such + cases we cannot update the checksum field since it is outside of + the 8 bytes of transport layer headers we are guaranteed */ + if (skb->len >= hdroff + sizeof(struct tcphdr)) + hdrsize = sizeof(struct tcphdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct tcphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.tcp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.tcp.port; + portptr = &hdr->dest; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_tcp = { + .l4proto = IPPROTO_TCP, + .manip_pkt = tcp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = tcp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c new file mode 100644 index 000000000000..7df613fb34a2 --- /dev/null +++ b/net/netfilter/nf_nat_proto_udp.c @@ -0,0 +1,76 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static u16 udp_port_rover; + +static void +udp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udp_port_rover); +} + +static bool +udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + __be16 *portptr, newport; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + hdr = (struct udphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + l3proto->csum_update(skb, iphdroff, &hdr->check, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, + 0); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + } + *portptr = newport; + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_udp = { + .l4proto = IPPROTO_UDP, + .manip_pkt = udp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c new file mode 100644 index 000000000000..776a0d1317b1 --- /dev/null +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -0,0 +1,106 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static u16 udplite_port_rover; + +static void +udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udplite_port_rover); +} + +static bool +udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + __be16 *portptr, newport; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of source port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + + *portptr = newport; + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { + .l4proto = IPPROTO_UDPLITE, + .manip_pkt = udplite_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udplite_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_udplite_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); +err1: + return err; +} + +static void __exit nf_nat_proto_udplite_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); +} + +module_init(nf_nat_proto_udplite_init); +module_exit(nf_nat_proto_udplite_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c new file mode 100644 index 000000000000..6e494d584412 --- /dev/null +++ b/net/netfilter/nf_nat_proto_unknown.c @@ -0,0 +1,54 @@ +/* The "unknown" protocol. This is what is used for protocols we + * don't understand. It's returned by ip_ct_find_proto(). + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type manip_type, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return true; +} + +static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + /* Sorry: we can't help you; if it's not unique, we can't frob + * anything. + */ + return; +} + +static bool +unknown_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_unknown = { + .manip_pkt = unknown_manip_pkt, + .in_range = unknown_in_range, + .unique_tuple = unknown_unique_tuple, +}; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c new file mode 100644 index 000000000000..7521368a6034 --- /dev/null +++ b/net/netfilter/xt_nat.c @@ -0,0 +1,167 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->rangesize != 1) { + pr_info("%s: multiple ranges no longer supported\n", + par->target->name); + return -EINVAL; + } + return 0; +} + +static void xt_nat_convert_range(struct nf_nat_range *dst, + const struct nf_nat_ipv4_range *src) +{ + memset(&dst->min_addr, 0, sizeof(dst->min_addr)); + memset(&dst->max_addr, 0, sizeof(dst->max_addr)); + + dst->flags = src->flags; + dst->min_addr.ip = src->min_ip; + dst->max_addr.ip = src->max_ip; + dst->min_proto = src->min; + dst->max_proto = src->max; +} + +static unsigned int +xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +static unsigned int +xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST); +} + +static struct xt_target xt_nat_target_reg[] __read_mostly = { + { + .name = "SNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_snat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_dnat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, + { + .name = "SNAT", + .revision = 1, + .target = xt_snat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 1, + .target = xt_dnat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +}; + +static int __init xt_nat_init(void) +{ + return xt_register_targets(xt_nat_target_reg, + ARRAY_SIZE(xt_nat_target_reg)); +} + +static void __exit xt_nat_exit(void) +{ + xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg)); +} + +module_init(xt_nat_init); +module_exit(xt_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_SNAT"); +MODULE_ALIAS("ipt_DNAT"); -- cgit v1.2.1 From 0ad352cb433ed2b05921a32b5ee20410512e2320 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:08 +0200 Subject: netfilter: ipv6: expand skb head in ip6_route_me_harder after oif change Expand the skb headroom if the oif changed due to rerouting similar to how IPv4 packets are handled. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index db31561cc8df..429089cb073d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, @@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) + return -1; + return 0; } EXPORT_SYMBOL(ip6_route_me_harder); -- cgit v1.2.1 From 2cf545e835aae92173ef0b1f4af385e9c40f21e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:10 +0200 Subject: net: core: add function for incremental IPv6 pseudo header checksum updates Add inet_proto_csum_replace16 for incrementally updating IPv6 pseudo header checksums for IPv6 NAT. Signed-off-by: Patrick McHardy Acked-by: David S. Miller --- net/core/utils.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/core/utils.c b/net/core/utils.c index 39895a65e54a..f5613d569c23 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, + const __be32 *from, const __be32 *to, + int pseudohdr) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial(diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial(diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), + csum_unfold(*sum))); +} +EXPORT_SYMBOL(inet_proto_csum_replace16); + int mac_pton(const char *s, u8 *mac) { int i; -- cgit v1.2.1 From 58a317f1061c894d2344c0b6a18ab4a64b69b815 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:12 +0200 Subject: netfilter: ipv6: add IPv6 NAT support Signed-off-by: Patrick McHardy --- net/core/secure_seq.c | 1 + net/ipv6/netfilter/Kconfig | 12 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/ip6table_nat.c | 321 +++++++++++++++++++++++++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 37 ++- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 287 ++++++++++++++++++++++ net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 90 +++++++ net/netfilter/nf_nat_core.c | 2 + net/netfilter/xt_nat.c | 3 + 9 files changed, 755 insertions(+), 2 deletions(-) create mode 100644 net/ipv6/netfilter/ip6table_nat.c create mode 100644 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c create mode 100644 net/ipv6/netfilter/nf_nat_proto_icmpv6.c (limited to 'net') diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 99b2596531bb..e61a8bb7fce7 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, return hash[0]; } +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 10135342799e..b27e0ad4b738 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,18 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 534d3f216f7b..76779376da4c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o +obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o # objects for l3 independent conntrack nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o @@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o +nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o +obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o + # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c new file mode 100644 index 000000000000..e418bd6350a4 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT + * funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv6_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV6, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv6_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + hooknum, hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv6_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv6_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv6_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_NAT_SRC, + }, +}; + +static int __net_init ip6table_nat_net_init(struct net *net) +{ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + kfree(repl); + if (IS_ERR(net->ipv6.ip6table_nat)) + return PTR_ERR(net->ipv6.ip6table_nat); + return 0; +} + +static void __net_exit ip6table_nat_net_exit(struct net *net) +{ + ip6t_unregister_table(net, net->ipv6.ip6table_nat); +} + +static struct pernet_operations ip6table_nat_net_ops = { + .init = ip6table_nat_net_init, + .exit = ip6table_nat_net_exit, +}; + +static int __init ip6table_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&ip6table_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&ip6table_nat_net_ops); +err1: + return err; +} + +static void __exit ip6table_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + unregister_pernet_subsys(&ip6table_nat_net_ops); +} + +module_init(ip6table_nat_init); +module_exit(ip6table_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index dcf6010f68d9..8860d23e61cf 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + int protoff; + __be16 frag_off; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + goto out; + } + + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + typeof(nf_nat_seq_adjust_hook) seq_adjust; + + seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } +out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -170,12 +201,14 @@ static unsigned int __ipv6_conntrack_in(struct net *net, } /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. + * POST_ROUTING hook. In case of unconfirmed connections NAT + * might reassign a helper, so the entire packet is also + * required. */ ct = nf_ct_get(reasm, &ctinfo); if (ct != NULL && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); - if (help && help->helper) { + if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { nf_conntrack_get_reasm(skb); NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, (struct net_device *)in, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c new file mode 100644 index 000000000000..81a2d1c3da8e --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of IPv6 NAT funded by Astaro. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv6_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi6 *fl6 = &fl->u.ip6; + + if (ct->status & statusbit) { + fl6->daddr = t->dst.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl6->saddr = t->src.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_sport = t->src.u.all; + } +} +#endif + +static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport); +} + +static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct ipv6hdr *ipv6h; + __be16 frag_off; + int hdroff; + u8 nexthdr; + + if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) + return false; + + ipv6h = (void *)skb->data + iphdroff; + nexthdr = ipv6h->nexthdr; + hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), + &nexthdr, &frag_off); + if (hdroff < 0) + goto manip_addr; + + if ((frag_off & htons(~0x7)) == 0 && + !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, + target, maniptype)) + return false; +manip_addr: + if (maniptype == NF_NAT_MANIP_SRC) + ipv6h->saddr = target->src.u3.in6; + else + ipv6h->daddr = target->dst.u3.in6; + + return true; +} + +static void nf_nat_ipv6_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); + const struct in6_addr *oldip, *newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = &ipv6h->saddr; + newip = &t->src.u3.in6; + } else { + oldip = &ipv6h->daddr; + newip = &t->dst.u3.in6; + } + inet_proto_csum_replace16(check, skb, oldip->s6_addr32, + newip->s6_addr32, 1); +} + +static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt6i_flags & RTF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V6_MINIP]) { + nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], + sizeof(struct in6_addr)); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V6_MAXIP]) + nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], + sizeof(struct in6_addr)); + else + range->max_addr = range->min_addr; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { + .l3proto = NFPROTO_IPV6, + .secure_port = nf_nat_ipv6_secure_port, + .in_range = nf_nat_ipv6_in_range, + .manip_pkt = nf_nat_ipv6_manip_pkt, + .csum_update = nf_nat_ipv6_csum_update, + .csum_recalc = nf_nat_ipv6_csum_recalc, + .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv6_decode_session, +#endif +}; + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen) +{ + struct { + struct icmp6hdr icmp6; + struct ipv6hdr ip6; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); + if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + inside = (void *)skb->data + hdrlen; + inside->icmp6.icmp6_cksum = 0; + inside->icmp6.icmp6_cksum = + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + skb->len - hdrlen, IPPROTO_ICMPV6, + csum_partial(&inside->icmp6, + skb->len - hdrlen, 0)); + } + + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); + if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); + +static int __init nf_nat_l3proto_ipv6_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv6_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); + +module_init(nf_nat_l3proto_ipv6_init); +module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c new file mode 100644 index 000000000000..5d6da784305b --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 Patrick Mchardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static bool +icmpv6_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u16 id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xffff; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST || + hdr->icmp6_code == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, 0); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { + .l4proto = IPPROTO_ICMPV6, + .manip_pkt = icmpv6_manip_pkt, + .in_range = icmpv6_in_range, + .unique_tuple = icmpv6_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c577b753fb9a..29d445235199 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -696,6 +696,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, + [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 7521368a6034..81aafa8e4fef 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -163,5 +163,8 @@ module_init(xt_nat_init); module_exit(xt_nat_exit); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); +MODULE_ALIAS("ip6t_SNAT"); +MODULE_ALIAS("ip6t_DNAT"); -- cgit v1.2.1 From b3f644fc8232ca761da0b5c5ccb6f30b423c4302 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:14 +0200 Subject: netfilter: ip6tables: add MASQUERADE target Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv6/addrconf.c | 2 +- net/ipv6/netfilter/Kconfig | 12 ++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_MASQUERADE.c | 135 +++++++++++++++++++++++++++++++++++ 5 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 net/ipv6/netfilter/ip6t_MASQUERADE.c (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 1c3aa28b51ae..5d5d4d1be9c2 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -99,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex) if (!nat) return 0; - + if (nf_ct_l3num(i) != NFPROTO_IPV4) + return 0; return nat->masq_index == (int)(long)ifindex; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e581009cb09e..19d4bffda9d7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1093,7 +1093,7 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, +int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index b27e0ad4b738..54a5032ea3df 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -144,6 +144,18 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. +config IP6_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + depends on NF_NAT_IPV6 + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 76779376da4c..068bad199587 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -34,4 +34,5 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets +obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c new file mode 100644 index 000000000000..60e9053bab05 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range newrange; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + if (ipv6_dev_get_saddr(dev_net(par->out), par->out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nfct_nat(ct)->masq_index = par->out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} + +static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +static int device_cmp(struct nf_conn *ct, void *ifindex) +{ + const struct nf_conn_nat *nat = nfct_nat(ct); + + if (!nat) + return 0; + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + return 0; + return nat->masq_index == (int)(long)ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + const struct net_device *dev = ptr; + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + + return masq_device_event(this, event, ifa->idev->dev); +} + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +static struct xt_target masquerade_tg6_reg __read_mostly = { + .name = "MASQUERADE", + .family = NFPROTO_IPV6, + .checkentry = masquerade_tg6_checkentry, + .target = masquerade_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .me = THIS_MODULE, +}; + +static int __init masquerade_tg6_init(void) +{ + int err; + + err = xt_register_target(&masquerade_tg6_reg); + if (err == 0) { + register_netdevice_notifier(&masq_dev_notifier); + register_inet6addr_notifier(&masq_inet_notifier); + } + + return err; +} +static void __exit masquerade_tg6_exit(void) +{ + unregister_inet6addr_notifier(&masq_inet_notifier); + unregister_netdevice_notifier(&masq_dev_notifier); + xt_unregister_target(&masquerade_tg6_reg); +} + +module_init(masquerade_tg6_init); +module_exit(masquerade_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: automatic address SNAT"); -- cgit v1.2.1 From 115e23ac78f87b632b5406e9d504fd56d17ffef1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:16 +0200 Subject: netfilter: ip6tables: add REDIRECT target Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/Kconfig | 11 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_REDIRECT.c | 98 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 net/ipv6/netfilter/ip6t_REDIRECT.c (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 54a5032ea3df..585590f16f8c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -156,6 +156,17 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NF_NAT_IPV6 + help + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 068bad199587..e30a531d40ce 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,4 +35,5 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o +obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_REDIRECT.c b/net/ipv6/netfilter/ip6t_REDIRECT.c new file mode 100644 index 000000000000..60497a3c6004 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REDIRECT.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +static unsigned int +redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = loopback_addr; + else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +static struct xt_target redirect_tg6_reg __read_mostly = { + .name = "REDIRECT", + .family = NFPROTO_IPV6, + .checkentry = redirect_tg6_checkentry, + .target = redirect_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, +}; + +static int __init redirect_tg6_init(void) +{ + return xt_register_target(&redirect_tg6_reg); +} + +static void __exit redirect_tg6_exit(void) +{ + xt_unregister_target(&redirect_tg6_reg); +} + +module_init(redirect_tg6_init); +module_exit(redirect_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); -- cgit v1.2.1 From ed72d9e294a66fce8f4b4a2f6c8c011b22f1a87c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:18 +0200 Subject: netfilter: ip6tables: add NETMAP target Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/Kconfig | 10 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NETMAP.c | 94 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 net/ipv6/netfilter/ip6t_NETMAP.c (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 585590f16f8c..7bdf73be9a99 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -156,6 +156,16 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NETMAP + tristate "NETMAP target support" + depends on NF_NAT_IPV6 + help + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index e30a531d40ce..0864ce601651 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,5 +35,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o +obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NETMAP.c b/net/ipv6/netfilter/ip6t_NETMAP.c new file mode 100644 index 000000000000..4f3bf360e50f --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NETMAP.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Svenning Soerensen's IPv4 NETMAP target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + union nf_inet_addr new_addr, netmask; + unsigned int i; + + ct = nf_ct_get(skb, &ctinfo); + for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) + netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ + range->max_addr.ip6[i]); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_addr.in6 = ipv6_hdr(skb)->daddr; + else + new_addr.in6 = ipv6_hdr(skb)->saddr; + + for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { + new_addr.ip6[i] &= ~netmask.ip6[i]; + new_addr.ip6[i] |= range->min_addr.ip6[i] & + netmask.ip6[i]; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = new_addr; + newrange.max_addr = new_addr; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return -EINVAL; + return 0; +} + +static struct xt_target netmap_tg6_reg __read_mostly = { + .name = "NETMAP", + .family = NFPROTO_IPV6, + .target = netmap_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg6_checkentry, + .me = THIS_MODULE, +}; + +static int __init netmap_tg6_init(void) +{ + return xt_register_target(&netmap_tg6_reg); +} + +static void netmap_tg6_exit(void) +{ + xt_unregister_target(&netmap_tg6_reg); +} + +module_init(netmap_tg6_init); +module_exit(netmap_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv6 subnets"); +MODULE_AUTHOR("Patrick McHardy "); -- cgit v1.2.1 From d33cbeeb1a46a7dc82fe9f53e40a742ce0c67c79 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:20 +0200 Subject: netfilter: nf_nat: support IPv6 in FTP NAT helper Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 5 -- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_ftp.c | 137 ------------------------------------- net/netfilter/Kconfig | 5 ++ net/netfilter/Makefile | 3 + net/netfilter/nf_conntrack_ftp.c | 3 +- net/netfilter/nf_nat_ftp.c | 143 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 152 insertions(+), 145 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_nat_ftp.c create mode 100644 net/netfilter/nf_nat_ftp.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index b26629681bdb..8c6c6920b57a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -221,11 +221,6 @@ config NF_NAT_PROTO_GRE tristate depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE -config NF_NAT_FTP - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_FTP - config NF_NAT_IRC tristate depends on NF_CONNTRACK && NF_NAT_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 0ea3acc510e2..4d8a4ad7816e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o -obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c deleted file mode 100644 index dd5e387fc03b..000000000000 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ /dev/null @@ -1,137 +0,0 @@ -/* FTP extension for TCP NAT alteration. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Rusty Russell "); -MODULE_DESCRIPTION("ftp NAT helper"); -MODULE_ALIAS("ip_nat_ftp"); - -/* FIXME: Time out? --RR */ - -static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, - char *buffer, size_t buflen, - __be32 addr, u16 port) -{ - switch (type) { - case NF_CT_FTP_PORT: - case NF_CT_FTP_PASV: - return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", - ((unsigned char *)&addr)[0], - ((unsigned char *)&addr)[1], - ((unsigned char *)&addr)[2], - ((unsigned char *)&addr)[3], - port >> 8, - port & 0xFF); - case NF_CT_FTP_EPRT: - return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); - case NF_CT_FTP_EPSV: - return snprintf(buffer, buflen, "|||%u|", port); - } - - return 0; -} - -/* So, this packet has hit the connection tracking matching code. - Mangle it, and change the expectation to match the new version. */ -static unsigned int nf_nat_ftp(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) -{ - __be32 newip; - u_int16_t port; - int dir = CTINFO2DIR(ctinfo); - struct nf_conn *ct = exp->master; - char buffer[sizeof("|1|255.255.255.255|65535|")]; - unsigned int buflen; - - pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); - - /* Connection will come from wherever this packet goes, hence !dir */ - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = !dir; - - /* When you see the packet, we need to NAT it the same as the - * this one. */ - exp->expectfn = nf_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - - if (port == 0) - return NF_DROP; - - buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); - if (!buflen) - goto out; - - pr_debug("calling nf_nat_mangle_tcp_packet\n"); - - if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, - matchlen, buffer, buflen)) - goto out; - - return NF_ACCEPT; - -out: - nf_ct_unexpect_related(exp); - return NF_DROP; -} - -static void __exit nf_nat_ftp_fini(void) -{ - RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); - synchronize_rcu(); -} - -static int __init nf_nat_ftp_init(void) -{ - BUG_ON(nf_nat_ftp_hook != NULL); - RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); - return 0; -} - -/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) -{ - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); - return 0; -} -module_param_call(ports, warn_set, NULL, NULL, 0); - -module_init(nf_nat_ftp_init); -module_exit(nf_nat_ftp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 91adddae20a4..310449450634 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -380,6 +380,11 @@ config NF_NAT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP select LIBCRC32C +config NF_NAT_FTP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_FTP + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 09c9451bc510..16592b1e5344 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -54,6 +54,9 @@ obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +# NAT helpers +obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o + # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index c0f4a5ba9016..f8cc26ad4456 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -488,8 +488,7 @@ static int help(struct sk_buff *skb, /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); - if (nf_nat_ftp && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_ftp && ct->status & IPS_NAT_MASK) ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, protoff, matchoff, matchlen, exp); else { diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c new file mode 100644 index 000000000000..e839b97b2863 --- /dev/null +++ b/net/netfilter/nf_nat_ftp.c @@ -0,0 +1,143 @@ +/* FTP extension for TCP NAT alteration. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell "); +MODULE_DESCRIPTION("ftp NAT helper"); +MODULE_ALIAS("ip_nat_ftp"); + +/* FIXME: Time out? --RR */ + +static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type, + char *buffer, size_t buflen, + union nf_inet_addr *addr, u16 port) +{ + switch (type) { + case NF_CT_FTP_PORT: + case NF_CT_FTP_PASV: + return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", + ((unsigned char *)&addr->ip)[0], + ((unsigned char *)&addr->ip)[1], + ((unsigned char *)&addr->ip)[2], + ((unsigned char *)&addr->ip)[3], + port >> 8, + port & 0xFF); + case NF_CT_FTP_EPRT: + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return snprintf(buffer, buflen, "|1|%pI4|%u|", + &addr->ip, port); + else + return snprintf(buffer, buflen, "|2|%pI6|%u|", + &addr->ip6, port); + case NF_CT_FTP_EPSV: + return snprintf(buffer, buflen, "|||%u|", port); + } + + return 0; +} + +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int nf_nat_ftp(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) +{ + union nf_inet_addr newaddr; + u_int16_t port; + int dir = CTINFO2DIR(ctinfo); + struct nf_conn *ct = exp->master; + char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN]; + unsigned int buflen; + + pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); + + /* Connection will come from wherever this packet goes, hence !dir */ + newaddr = ct->tuplehash[!dir].tuple.dst.u3; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = !dir; + + /* When you see the packet, we need to NAT it the same as the + * this one. */ + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + port = 0; + break; + } + } + + if (port == 0) + return NF_DROP; + + buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer), + &newaddr, port); + if (!buflen) + goto out; + + pr_debug("calling nf_nat_mangle_tcp_packet\n"); + + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, + matchlen, buffer, buflen)) + goto out; + + return NF_ACCEPT; + +out: + nf_ct_unexpect_related(exp); + return NF_DROP; +} + +static void __exit nf_nat_ftp_fini(void) +{ + RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_ftp_init(void) +{ + BUG_ON(nf_nat_ftp_hook != NULL); + RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); + return 0; +} + +/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ +static int warn_set(const char *val, struct kernel_param *kp) +{ + printk(KERN_INFO KBUILD_MODNAME + ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + return 0; +} +module_param_call(ports, warn_set, NULL, NULL, 0); + +module_init(nf_nat_ftp_init); +module_exit(nf_nat_ftp_fini); -- cgit v1.2.1 From ee6eb96673704225164f0ba7462e1973ce10885c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:22 +0200 Subject: netfilter: nf_nat: support IPv6 in amanda NAT helper Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 5 --- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_amanda.c | 85 ------------------------------------- net/netfilter/Kconfig | 5 +++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_amanda.c | 3 +- net/netfilter/nf_nat_amanda.c | 85 +++++++++++++++++++++++++++++++++++++ 7 files changed, 92 insertions(+), 93 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_nat_amanda.c create mode 100644 net/netfilter/nf_nat_amanda.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 8c6c6920b57a..52c4a87007aa 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -231,11 +231,6 @@ config NF_NAT_TFTP depends on NF_CONNTRACK && NF_NAT_IPV4 default NF_NAT_IPV4 && NF_CONNTRACK_TFTP -config NF_NAT_AMANDA - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_AMANDA - config NF_NAT_PPTP tristate depends on NF_CONNTRACK && NF_NAT_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 4d8a4ad7816e..8baa496f6a4e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o # NAT helpers (nf_conntrack) -obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c deleted file mode 100644 index 42d337881171..000000000000 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ /dev/null @@ -1,85 +0,0 @@ -/* Amanda extension for TCP NAT alteration. - * (C) 2002 by Brian J. Murrell - * based on a copy of HW's ip_nat_irc.c as well as other modules - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_AUTHOR("Brian J. Murrell "); -MODULE_DESCRIPTION("Amanda NAT helper"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_amanda"); - -static unsigned int help(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) -{ - char buffer[sizeof("65535")]; - u_int16_t port; - unsigned int ret; - - /* Connection comes from client. */ - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = IP_CT_DIR_ORIGINAL; - - /* When you see the packet, we need to NAT it the same as the - * this one (ie. same IP: it will be TCP and master is UDP). */ - exp->expectfn = nf_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int res; - - exp->tuple.dst.u.tcp.port = htons(port); - res = nf_ct_expect_related(exp); - if (res == 0) - break; - else if (res != -EBUSY) { - port = 0; - break; - } - } - - if (port == 0) - return NF_DROP; - - sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, - buffer, strlen(buffer)); - if (ret != NF_ACCEPT) - nf_ct_unexpect_related(exp); - return ret; -} - -static void __exit nf_nat_amanda_fini(void) -{ - RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); - synchronize_rcu(); -} - -static int __init nf_nat_amanda_init(void) -{ - BUG_ON(nf_nat_amanda_hook != NULL); - RCU_INIT_POINTER(nf_nat_amanda_hook, help); - return 0; -} - -module_init(nf_nat_amanda_init); -module_exit(nf_nat_amanda_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 310449450634..2eee9f1f99ef 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -380,6 +380,11 @@ config NF_NAT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP select LIBCRC32C +config NF_NAT_AMANDA + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_AMANDA + config NF_NAT_FTP tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 16592b1e5344..7d6e1ea14c9b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # NAT helpers +obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o # transparent proxy support diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index e0212b5494b1..c514fe6033d2 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -155,8 +155,7 @@ static int amanda_help(struct sk_buff *skb, IPPROTO_TCP, NULL, &port); nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); - if (nf_nat_amanda && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(skb, ctinfo, protoff, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c new file mode 100644 index 000000000000..42d337881171 --- /dev/null +++ b/net/netfilter/nf_nat_amanda.c @@ -0,0 +1,85 @@ +/* Amanda extension for TCP NAT alteration. + * (C) 2002 by Brian J. Murrell + * based on a copy of HW's ip_nat_irc.c as well as other modules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_AUTHOR("Brian J. Murrell "); +MODULE_DESCRIPTION("Amanda NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_amanda"); + +static unsigned int help(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) +{ + char buffer[sizeof("65535")]; + u_int16_t port; + unsigned int ret; + + /* Connection comes from client. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_ORIGINAL; + + /* When you see the packet, we need to NAT it the same as the + * this one (ie. same IP: it will be TCP and master is UDP). */ + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + int res; + + exp->tuple.dst.u.tcp.port = htons(port); + res = nf_ct_expect_related(exp); + if (res == 0) + break; + else if (res != -EBUSY) { + port = 0; + break; + } + } + + if (port == 0) + return NF_DROP; + + sprintf(buffer, "%u", port); + ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, + protoff, matchoff, matchlen, + buffer, strlen(buffer)); + if (ret != NF_ACCEPT) + nf_ct_unexpect_related(exp); + return ret; +} + +static void __exit nf_nat_amanda_fini(void) +{ + RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_amanda_init(void) +{ + BUG_ON(nf_nat_amanda_hook != NULL); + RCU_INIT_POINTER(nf_nat_amanda_hook, help); + return 0; +} + +module_init(nf_nat_amanda_init); +module_exit(nf_nat_amanda_fini); -- cgit v1.2.1 From 9a664821068739dbc8eac13770e28167b46a0c0f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:25 +0200 Subject: netfilter: nf_nat: support IPv6 in SIP NAT helper Add IPv6 support to the SIP NAT helper. There are no functional differences to IPv4 NAT, just different formats for addresses. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 5 - net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_sip.c | 580 ------------------------------------- net/netfilter/Kconfig | 5 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_sip.c | 68 ++--- net/netfilter/nf_nat_sip.c | 609 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 649 insertions(+), 620 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_nat_sip.c create mode 100644 net/netfilter/nf_nat_sip.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 52c4a87007aa..30197f8003be 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -242,11 +242,6 @@ config NF_NAT_H323 depends on NF_CONNTRACK && NF_NAT_IPV4 default NF_NAT_IPV4 && NF_CONNTRACK_H323 -config NF_NAT_SIP - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_SIP - # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 8baa496f6a4e..8914abffc96d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o -obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c deleted file mode 100644 index 47a47186a791..000000000000 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ /dev/null @@ -1,580 +0,0 @@ -/* SIP extension for NAT alteration. - * - * (C) 2005 by Christian Hentschel - * based on RR's ip_nat_ftp.c and other modules. - * (C) 2007 United Security Providers - * (C) 2007, 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Christian Hentschel "); -MODULE_DESCRIPTION("SIP NAT helper"); -MODULE_ALIAS("ip_nat_sip"); - - -static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int matchoff, unsigned int matchlen, - const char *buffer, unsigned int buflen) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct tcphdr *th; - unsigned int baseoff; - - if (nf_ct_protonum(ct) == IPPROTO_TCP) { - th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); - baseoff = ip_hdrlen(skb) + th->doff * 4; - matchoff += dataoff - baseoff; - - if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - protoff, matchoff, matchlen, - buffer, buflen, false)) - return 0; - } else { - baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); - matchoff += dataoff - baseoff; - - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - protoff, matchoff, matchlen, - buffer, buflen)) - return 0; - } - - /* Reload data pointer and adjust datalen value */ - *dptr = skb->data + dataoff; - *datalen += buflen - matchlen; - return 1; -} - -static int map_addr(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int matchoff, unsigned int matchlen, - union nf_inet_addr *addr, __be16 port) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - unsigned int buflen; - __be32 newaddr; - __be16 newport; - - if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip && - ct->tuplehash[dir].tuple.src.u.udp.port == port) { - newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip; - newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; - } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && - ct->tuplehash[dir].tuple.dst.u.udp.port == port) { - newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; - } else - return 1; - - if (newaddr == addr->ip && newport == port) - return 1; - - buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); - - return mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen); -} - -static int map_sip_addr(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - enum sip_header_types type) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchlen, matchoff; - union nf_inet_addr addr; - __be16 port; - - if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, - &matchoff, &matchlen, &addr, &port) <= 0) - return 1; - return map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port); -} - -static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned int coff, matchoff, matchlen; - enum sip_header_types hdr; - union nf_inet_addr addr; - __be16 port; - int request, in_header; - - /* Basic rules: requests and responses. */ - if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { - if (ct_sip_parse_request(ct, *dptr, *datalen, - &matchoff, &matchlen, - &addr, &port) > 0 && - !map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) - return NF_DROP; - request = 1; - } else - request = 0; - - if (nf_ct_protonum(ct) == IPPROTO_TCP) - hdr = SIP_HDR_VIA_TCP; - else - hdr = SIP_HDR_VIA_UDP; - - /* Translate topmost Via header and parameters */ - if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, - hdr, NULL, &matchoff, &matchlen, - &addr, &port) > 0) { - unsigned int olen, matchend, poff, plen, buflen, n; - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - - /* We're only interested in headers related to this - * connection */ - if (request) { - if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip || - port != ct->tuplehash[dir].tuple.src.u.udp.port) - goto next; - } else { - if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip || - port != ct->tuplehash[dir].tuple.dst.u.udp.port) - goto next; - } - - olen = *datalen; - if (!map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) - return NF_DROP; - - matchend = matchoff + matchlen + *datalen - olen; - - /* The maddr= parameter (RFC 2361) specifies where to send - * the reply. */ - if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, - "maddr=", &poff, &plen, - &addr, true) > 0 && - addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && - addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { - buflen = sprintf(buffer, "%pI4", - &ct->tuplehash[!dir].tuple.dst.u3.ip); - if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) - return NF_DROP; - } - - /* The received= parameter (RFC 2361) contains the address - * from which the server received the request. */ - if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, - "received=", &poff, &plen, - &addr, false) > 0 && - addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && - addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - buflen = sprintf(buffer, "%pI4", - &ct->tuplehash[!dir].tuple.src.u3.ip); - if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) - return NF_DROP; - } - - /* The rport= parameter (RFC 3581) contains the port number - * from which the server received the request. */ - if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, - "rport=", &poff, &plen, - &n) > 0 && - htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && - htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { - __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; - buflen = sprintf(buffer, "%u", ntohs(p)); - if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) - return NF_DROP; - } - } - -next: - /* Translate Contact headers */ - coff = 0; - in_header = 0; - while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, - SIP_HDR_CONTACT, &in_header, - &matchoff, &matchlen, - &addr, &port) > 0) { - if (!map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, - &addr, port)) - return NF_DROP; - } - - if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) - return NF_DROP; - - return NF_ACCEPT; -} - -static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - const struct tcphdr *th; - - if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) - return; - - th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); -} - -/* Handles expected signalling connections and media streams */ -static void ip_nat_sip_expected(struct nf_conn *ct, - struct nf_conntrack_expect *exp) -{ - struct nf_nat_range range; - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* For DST manip, map port here to where it's expected. */ - range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min_proto = range.max_proto = exp->saved_proto; - range.min_addr = range.max_addr = exp->saved_addr; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); - - /* Change src to where master sends to, but only if the connection - * actually came from the same source. */ - if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == - ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_addr = range.max_addr - = ct->master->tuplehash[!exp->dir].tuple.dst.u3; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); - } -} - -static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - __be32 newip; - u_int16_t port; - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - unsigned int buflen; - - /* Connection will come from reply */ - if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) - newip = exp->tuple.dst.u3.ip; - else - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; - - /* If the signalling port matches the connection's source port in the - * original direction, try to use the destination port in the opposite - * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) - port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); - else - port = ntohs(exp->tuple.dst.u.udp.port); - - exp->saved_addr = exp->tuple.dst.u3; - exp->tuple.dst.u3.ip = newip; - exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; - exp->dir = !dir; - exp->expectfn = ip_nat_sip_expected; - - for (; port != 0; port++) { - int ret; - - exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(exp); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - - if (port == 0) - return NF_DROP; - - if (exp->tuple.dst.u3.ip != exp->saved_addr.ip || - exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { - buflen = sprintf(buffer, "%pI4:%u", &newip, port); - if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen)) - goto err; - } - return NF_ACCEPT; - -err: - nf_ct_unexpect_related(exp); - return NF_DROP; -} - -static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchoff, matchlen; - char buffer[sizeof("65536")]; - int buflen, c_len; - - /* Get actual SDP length */ - if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, - SDP_HDR_VERSION, SDP_HDR_UNSPEC, - &matchoff, &matchlen) <= 0) - return 0; - c_len = *datalen - matchoff + strlen("v="); - - /* Now, update SDP length */ - if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH, - &matchoff, &matchlen) <= 0) - return 0; - - buflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen); -} - -static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - char *buffer, int buflen) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchlen, matchoff; - - if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, - &matchoff, &matchlen) <= 0) - return -ENOENT; - return mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; -} - -static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) -{ - char buffer[sizeof("nnn.nnn.nnn.nnn")]; - unsigned int buflen; - - buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, - sdpoff, type, term, buffer, buflen)) - return 0; - - return mangle_content_len(skb, protoff, dataoff, dptr, datalen); -} - -static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) -{ - char buffer[sizeof("nnnnn")]; - unsigned int buflen; - - buflen = sprintf(buffer, "%u", port); - if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen)) - return 0; - - return mangle_content_len(skb, protoff, dataoff, dptr, datalen); -} - -static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) -{ - char buffer[sizeof("nnn.nnn.nnn.nnn")]; - unsigned int buflen; - - /* Mangle session description owner and contact addresses */ - buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, - SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, - buffer, buflen)) - return 0; - - switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, - buffer, buflen)) { - case 0: - /* - * RFC 2327: - * - * Session description - * - * c=* (connection information - not required if included in all media) - */ - case -ENOENT: - break; - default: - return 0; - } - - return mangle_content_len(skb, protoff, dataoff, dptr, datalen); -} - -/* So, this packet has hit the connection tracking matching code. - Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - u_int16_t port; - - /* Connection will come from reply */ - if (ct->tuplehash[dir].tuple.src.u3.ip == - ct->tuplehash[!dir].tuple.dst.u3.ip) - rtp_addr->ip = rtp_exp->tuple.dst.u3.ip; - else - rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; - - rtp_exp->saved_addr = rtp_exp->tuple.dst.u3; - rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; - rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; - rtp_exp->dir = !dir; - rtp_exp->expectfn = ip_nat_sip_expected; - - rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3; - rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; - rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; - rtcp_exp->dir = !dir; - rtcp_exp->expectfn = ip_nat_sip_expected; - - /* Try to get same pair of ports: if not, try to change them. */ - for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); - port != 0; port += 2) { - int ret; - - rtp_exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(rtp_exp); - if (ret == -EBUSY) - continue; - else if (ret < 0) { - port = 0; - break; - } - rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); - ret = nf_ct_expect_related(rtcp_exp); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nf_ct_unexpect_related(rtp_exp); - port = 0; - break; - } - } - - if (port == 0) - goto err1; - - /* Update media port. */ - if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && - !ip_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, - mediaoff, medialen, port)) - goto err2; - - return NF_ACCEPT; - -err2: - nf_ct_unexpect_related(rtp_exp); - nf_ct_unexpect_related(rtcp_exp); -err1: - return NF_DROP; -} - -static struct nf_ct_helper_expectfn sip_nat = { - .name = "sip", - .expectfn = ip_nat_sip_expected, -}; - -static void __exit nf_nat_sip_fini(void) -{ - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); - nf_ct_helper_expectfn_unregister(&sip_nat); - synchronize_rcu(); -} - -static int __init nf_nat_sip_init(void) -{ - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); - nf_ct_helper_expectfn_register(&sip_nat); - return 0; -} - -module_init(nf_nat_sip_init); -module_exit(nf_nat_sip_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2eee9f1f99ef..bf3e4649efb2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -390,6 +390,11 @@ config NF_NAT_FTP depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP +config NF_NAT_SIP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_SIP + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7d6e1ea14c9b..7d6d1a035f31 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o +obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d5174902db37..df8f4f284481 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -57,7 +57,8 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly; +void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, + s16 off) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, @@ -742,13 +743,18 @@ static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, * be tolerant and also accept records terminated with a single newline * character". We handle both cases. */ -static const struct sip_header ct_sdp_hdrs[] = { - [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), - [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), - [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), - [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), - [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), - [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), +static const struct sip_header ct_sdp_hdrs_v4[] = { + [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), + [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), +}; + +static const struct sip_header ct_sdp_hdrs_v6[] = { + [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), + [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; /* Linear string search within SDP header values */ @@ -774,11 +780,14 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, enum sdp_header_types term, unsigned int *matchoff, unsigned int *matchlen) { - const struct sip_header *hdr = &ct_sdp_hdrs[type]; - const struct sip_header *thdr = &ct_sdp_hdrs[term]; + const struct sip_header *hdrs, *hdr, *thdr; const char *start = dptr, *limit = dptr + datalen; int shift = 0; + hdrs = nf_ct_l3num(ct) == NFPROTO_IPV4 ? ct_sdp_hdrs_v4 : ct_sdp_hdrs_v6; + hdr = &hdrs[type]; + thdr = &hdrs[term]; + for (dptr += dataoff; dptr < limit; dptr++) { /* Find beginning of line */ if (*dptr != '\r' && *dptr != '\n') @@ -945,12 +954,12 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp->class != class) break; #ifdef CONFIG_NF_NAT_NEEDED - if (exp->tuple.src.l3num == AF_INET && !direct_rtp && - (exp->saved_addr.ip != exp->tuple.dst.u3.ip || + if (!direct_rtp && + (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && ct->status & IPS_NAT_MASK) { - daddr->ip = exp->saved_addr.ip; - tuple.dst.u3.ip = exp->saved_addr.ip; + *daddr = exp->saved_addr; + tuple.dst.u3 = exp->saved_addr; tuple.dst.u.udp.port = exp->saved_proto.udp.port; direct_rtp = 1; } else @@ -987,8 +996,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, IPPROTO_UDP, NULL, &rtcp_port); nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK && !direct_rtp) + if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); @@ -1044,15 +1052,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; unsigned int port; - enum sdp_header_types c_hdr; const struct sdp_media_type *t; int ret = NF_ACCEPT; typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); - c_hdr = nf_ct_l3num(ct) == AF_INET ? SDP_HDR_CONNECTION_IP4 : - SDP_HDR_CONNECTION_IP6; /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1066,7 +1071,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, * the end of the session description. */ caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, - c_hdr, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &matchoff, &matchlen, &caddr) > 0) caddr_len = matchlen; @@ -1096,7 +1101,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* The media description overrides the session description. */ maddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen, - c_hdr, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); @@ -1113,11 +1118,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, return ret; /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && - nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { + if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { ret = nf_nat_sdp_addr(skb, protoff, dataoff, - dptr, datalen, - mediaoff, c_hdr, SDP_HDR_MEDIA, + dptr, datalen, mediaoff, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) return ret; @@ -1127,8 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1293,8 +1296,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, exp, matchoff, matchlen); else { @@ -1476,8 +1478,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, else ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); - if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) { + if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, dptr, datalen)) @@ -1560,11 +1561,10 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, datalen = datalen + diff - msglen; } - if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) { + if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, tdiff); + nf_nat_sip_seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c new file mode 100644 index 000000000000..f4db3a7bd285 --- /dev/null +++ b/net/netfilter/nf_nat_sip.c @@ -0,0 +1,609 @@ +/* SIP extension for NAT alteration. + * + * (C) 2005 by Christian Hentschel + * based on RR's ip_nat_ftp.c and other modules. + * (C) 2007 United Security Providers + * (C) 2007, 2008, 2011, 2012 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Hentschel "); +MODULE_DESCRIPTION("SIP NAT helper"); +MODULE_ALIAS("ip_nat_sip"); + + +static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int matchoff, unsigned int matchlen, + const char *buffer, unsigned int buflen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct tcphdr *th; + unsigned int baseoff; + + if (nf_ct_protonum(ct) == IPPROTO_TCP) { + th = (struct tcphdr *)(skb->data + protoff); + baseoff = protoff + th->doff * 4; + matchoff += dataoff - baseoff; + + if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + protoff, matchoff, matchlen, + buffer, buflen, false)) + return 0; + } else { + baseoff = protoff + sizeof(struct udphdr); + matchoff += dataoff - baseoff; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + protoff, matchoff, matchlen, + buffer, buflen)) + return 0; + } + + /* Reload data pointer and adjust datalen value */ + *dptr = skb->data + dataoff; + *datalen += buflen - matchlen; + return 1; +} + +static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer, + const union nf_inet_addr *addr, bool delim) +{ + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return sprintf(buffer, "%pI4", &addr->ip); + else { + if (delim) + return sprintf(buffer, "[%pI6c]", &addr->ip6); + else + return sprintf(buffer, "%pI6c", &addr->ip6); + } +} + +static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer, + const union nf_inet_addr *addr, u16 port) +{ + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return sprintf(buffer, "%pI4:%u", &addr->ip, port); + else + return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port); +} + +static int map_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int matchoff, unsigned int matchlen, + union nf_inet_addr *addr, __be16 port) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; + unsigned int buflen; + union nf_inet_addr newaddr; + __be16 newport; + + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, addr) && + ct->tuplehash[dir].tuple.src.u.udp.port == port) { + newaddr = ct->tuplehash[!dir].tuple.dst.u3; + newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; + } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && + ct->tuplehash[dir].tuple.dst.u.udp.port == port) { + newaddr = ct->tuplehash[!dir].tuple.src.u3; + newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + } else + return 1; + + if (nf_inet_addr_cmp(&newaddr, addr) && newport == port) + return 1; + + buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport)); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); +} + +static int map_sip_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + enum sip_header_types type) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchlen, matchoff; + union nf_inet_addr addr; + __be16 port; + + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, + &matchoff, &matchlen, &addr, &port) <= 0) + return 1; + return map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port); +} + +static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned int coff, matchoff, matchlen; + enum sip_header_types hdr; + union nf_inet_addr addr; + __be16 port; + int request, in_header; + + /* Basic rules: requests and responses. */ + if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { + if (ct_sip_parse_request(ct, *dptr, *datalen, + &matchoff, &matchlen, + &addr, &port) > 0 && + !map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) + return NF_DROP; + request = 1; + } else + request = 0; + + if (nf_ct_protonum(ct) == IPPROTO_TCP) + hdr = SIP_HDR_VIA_TCP; + else + hdr = SIP_HDR_VIA_UDP; + + /* Translate topmost Via header and parameters */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + hdr, NULL, &matchoff, &matchlen, + &addr, &port) > 0) { + unsigned int olen, matchend, poff, plen, buflen, n; + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; + + /* We're only interested in headers related to this + * connection */ + if (request) { + if (!nf_inet_addr_cmp(&addr, + &ct->tuplehash[dir].tuple.src.u3) || + port != ct->tuplehash[dir].tuple.src.u.udp.port) + goto next; + } else { + if (!nf_inet_addr_cmp(&addr, + &ct->tuplehash[dir].tuple.dst.u3) || + port != ct->tuplehash[dir].tuple.dst.u.udp.port) + goto next; + } + + olen = *datalen; + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) + return NF_DROP; + + matchend = matchoff + matchlen + *datalen - olen; + + /* The maddr= parameter (RFC 2361) specifies where to send + * the reply. */ + if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, + "maddr=", &poff, &plen, + &addr, true) > 0 && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) && + !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) { + buflen = sip_sprintf_addr(ct, buffer, + &ct->tuplehash[!dir].tuple.dst.u3, + true); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) + return NF_DROP; + } + + /* The received= parameter (RFC 2361) contains the address + * from which the server received the request. */ + if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, + "received=", &poff, &plen, + &addr, false) > 0 && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) && + !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) { + buflen = sip_sprintf_addr(ct, buffer, + &ct->tuplehash[!dir].tuple.src.u3, + false); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) + return NF_DROP; + } + + /* The rport= parameter (RFC 3581) contains the port number + * from which the server received the request. */ + if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, + "rport=", &poff, &plen, + &n) > 0 && + htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && + htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { + __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; + buflen = sprintf(buffer, "%u", ntohs(p)); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) + return NF_DROP; + } + } + +next: + /* Translate Contact headers */ + coff = 0; + in_header = 0; + while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, + SIP_HDR_CONTACT, &in_header, + &matchoff, &matchlen, + &addr, &port) > 0) { + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, + &addr, port)) + return NF_DROP; + } + + if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) + return NF_DROP; + + return NF_ACCEPT; +} + +static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, + s16 off) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) + return; + + th = (struct tcphdr *)(skb->data + protoff); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} + +/* Handles expected signalling connections and media streams */ +static void nf_nat_sip_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr = exp->saved_addr; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); + + /* Change src to where master sends to, but only if the connection + * actually came from the same source. */ + if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, + &ct->master->tuplehash[exp->dir].tuple.src.u3)) { + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + } +} + +static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + union nf_inet_addr newaddr; + u_int16_t port; + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; + unsigned int buflen; + + /* Connection will come from reply */ + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3)) + newaddr = exp->tuple.dst.u3; + else + newaddr = ct->tuplehash[!dir].tuple.dst.u3; + + /* If the signalling port matches the connection's source port in the + * original direction, try to use the destination port in the opposite + * direction. */ + if (exp->tuple.dst.u.udp.port == + ct->tuplehash[dir].tuple.src.u.udp.port) + port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); + else + port = ntohs(exp->tuple.dst.u.udp.port); + + exp->saved_addr = exp->tuple.dst.u3; + exp->tuple.dst.u3 = newaddr; + exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; + exp->dir = !dir; + exp->expectfn = nf_nat_sip_expected; + + for (; port != 0; port++) { + int ret; + + exp->tuple.dst.u.udp.port = htons(port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + port = 0; + break; + } + } + + if (port == 0) + return NF_DROP; + + if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || + exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { + buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) + goto err; + } + return NF_ACCEPT; + +err: + nf_ct_unexpect_related(exp); + return NF_DROP; +} + +static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchoff, matchlen; + char buffer[sizeof("65536")]; + int buflen, c_len; + + /* Get actual SDP length */ + if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, + SDP_HDR_VERSION, SDP_HDR_UNSPEC, + &matchoff, &matchlen) <= 0) + return 0; + c_len = *datalen - matchoff + strlen("v="); + + /* Now, update SDP length */ + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH, + &matchoff, &matchlen) <= 0) + return 0; + + buflen = sprintf(buffer, "%u", c_len); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); +} + +static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, + enum sdp_header_types type, + enum sdp_header_types term, + char *buffer, int buflen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchlen, matchoff; + + if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, + &matchoff, &matchlen) <= 0) + return -ENOENT; + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; +} + +static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + char buffer[INET6_ADDRSTRLEN]; + unsigned int buflen; + + buflen = sip_sprintf_addr(ct, buffer, addr, false); + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, + sdpoff, type, term, buffer, buflen)) + return 0; + + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); +} + +static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port) +{ + char buffer[sizeof("nnnnn")]; + unsigned int buflen; + + buflen = sprintf(buffer, "%u", port); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) + return 0; + + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); +} + +static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, + const union nf_inet_addr *addr) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + char buffer[INET6_ADDRSTRLEN]; + unsigned int buflen; + + /* Mangle session description owner and contact addresses */ + buflen = sip_sprintf_addr(ct, buffer, addr, false); + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, + SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen)) + return 0; + + switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + buffer, buflen)) { + case 0: + /* + * RFC 2327: + * + * Session description + * + * c=* (connection information - not required if included in all media) + */ + case -ENOENT: + break; + default: + return 0; + } + + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); +} + +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, + const char **dptr, unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + u_int16_t port; + + /* Connection will come from reply */ + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3)) + *rtp_addr = rtp_exp->tuple.dst.u3; + else + *rtp_addr = ct->tuplehash[!dir].tuple.dst.u3; + + rtp_exp->saved_addr = rtp_exp->tuple.dst.u3; + rtp_exp->tuple.dst.u3 = *rtp_addr; + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->dir = !dir; + rtp_exp->expectfn = nf_nat_sip_expected; + + rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3; + rtcp_exp->tuple.dst.u3 = *rtp_addr; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->dir = !dir; + rtcp_exp->expectfn = nf_nat_sip_expected; + + /* Try to get same pair of ports: if not, try to change them. */ + for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); + port != 0; port += 2) { + int ret; + + rtp_exp->tuple.dst.u.udp.port = htons(port); + ret = nf_ct_expect_related(rtp_exp); + if (ret == -EBUSY) + continue; + else if (ret < 0) { + port = 0; + break; + } + rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); + ret = nf_ct_expect_related(rtcp_exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + port = 0; + break; + } + } + + if (port == 0) + goto err1; + + /* Update media port. */ + if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && + !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + mediaoff, medialen, port)) + goto err2; + + return NF_ACCEPT; + +err2: + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); +err1: + return NF_DROP; +} + +static struct nf_ct_helper_expectfn sip_nat = { + .name = "sip", + .expectfn = nf_nat_sip_expected, +}; + +static void __exit nf_nat_sip_fini(void) +{ + RCU_INIT_POINTER(nf_nat_sip_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); + synchronize_rcu(); +} + +static int __init nf_nat_sip_init(void) +{ + BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); + BUG_ON(nf_nat_sip_expect_hook != NULL); + BUG_ON(nf_nat_sdp_addr_hook != NULL); + BUG_ON(nf_nat_sdp_port_hook != NULL); + BUG_ON(nf_nat_sdp_session_hook != NULL); + BUG_ON(nf_nat_sdp_media_hook != NULL); + RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + nf_ct_helper_expectfn_register(&sip_nat); + return 0; +} + +module_init(nf_nat_sip_init); +module_exit(nf_nat_sip_fini); -- cgit v1.2.1 From 5901b6be885e2c9a30fd94803b846b3d33e351dd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Aug 2012 19:14:27 +0200 Subject: netfilter: nf_nat: support IPv6 in IRC NAT helper Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 5 -- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_irc.c | 99 ---------------------------------------- net/netfilter/Kconfig | 5 ++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_irc.c | 3 +- net/netfilter/nf_nat_irc.c | 93 +++++++++++++++++++++++++++++++++++++ 7 files changed, 100 insertions(+), 107 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_nat_irc.c create mode 100644 net/netfilter/nf_nat_irc.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30197f8003be..843fe17db20d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -221,11 +221,6 @@ config NF_NAT_PROTO_GRE tristate depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE -config NF_NAT_IRC - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_IRC - config NF_NAT_TFTP tristate depends on NF_CONNTRACK && NF_NAT_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 8914abffc96d..17e649bb98ba 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o -obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c deleted file mode 100644 index 1ce37f89ec78..000000000000 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ /dev/null @@ -1,99 +0,0 @@ -/* IRC extension for TCP NAT alteration. - * - * (C) 2000-2001 by Harald Welte - * (C) 2004 Rusty Russell IBM Corporation - * based on a copy of RR's ip_nat_ftp.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("IRC (DCC) NAT helper"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_irc"); - -static unsigned int help(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) -{ - char buffer[sizeof("4294967296 65635")]; - u_int32_t ip; - u_int16_t port; - unsigned int ret; - - /* Reply comes from server. */ - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = IP_CT_DIR_REPLY; - exp->expectfn = nf_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - - if (port == 0) - return NF_DROP; - - ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); - sprintf(buffer, "%u %u", ip, port); - pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", - buffer, &ip, port); - - ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, buffer, - strlen(buffer)); - if (ret != NF_ACCEPT) - nf_ct_unexpect_related(exp); - return ret; -} - -static void __exit nf_nat_irc_fini(void) -{ - RCU_INIT_POINTER(nf_nat_irc_hook, NULL); - synchronize_rcu(); -} - -static int __init nf_nat_irc_init(void) -{ - BUG_ON(nf_nat_irc_hook != NULL); - RCU_INIT_POINTER(nf_nat_irc_hook, help); - return 0; -} - -/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) -{ - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); - return 0; -} -module_param_call(ports, warn_set, NULL, NULL, 0); - -module_init(nf_nat_irc_init); -module_exit(nf_nat_irc_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bf3e4649efb2..cabe4da0e191 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -390,6 +390,11 @@ config NF_NAT_FTP depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP +config NF_NAT_IRC + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_IRC + config NF_NAT_SIP tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7d6d1a035f31..0dd792972cae 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o +obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o # transparent proxy support diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 95d097cdb202..3b20aa77cfc8 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -205,8 +205,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, IPPROTO_TCP, NULL, &port); nf_nat_irc = rcu_dereference(nf_nat_irc_hook); - if (nf_nat_irc && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_irc && ct->status & IPS_NAT_MASK) ret = nf_nat_irc(skb, ctinfo, protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c new file mode 100644 index 000000000000..1fedee6e7fb6 --- /dev/null +++ b/net/netfilter/nf_nat_irc.c @@ -0,0 +1,93 @@ +/* IRC extension for TCP NAT alteration. + * + * (C) 2000-2001 by Harald Welte + * (C) 2004 Rusty Russell IBM Corporation + * based on a copy of RR's ip_nat_ftp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("IRC (DCC) NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_irc"); + +static unsigned int help(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) +{ + char buffer[sizeof("4294967296 65635")]; + u_int16_t port; + unsigned int ret; + + /* Reply comes from server. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_REPLY; + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + port = 0; + break; + } + } + + if (port == 0) + return NF_DROP; + + ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, + protoff, matchoff, matchlen, buffer, + strlen(buffer)); + if (ret != NF_ACCEPT) + nf_ct_unexpect_related(exp); + return ret; +} + +static void __exit nf_nat_irc_fini(void) +{ + RCU_INIT_POINTER(nf_nat_irc_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_irc_init(void) +{ + BUG_ON(nf_nat_irc_hook != NULL); + RCU_INIT_POINTER(nf_nat_irc_hook, help); + return 0; +} + +/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ +static int warn_set(const char *val, struct kernel_param *kp) +{ + printk(KERN_INFO KBUILD_MODNAME + ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + return 0; +} +module_param_call(ports, warn_set, NULL, NULL, 0); + +module_init(nf_nat_irc_init); +module_exit(nf_nat_irc_fini); -- cgit v1.2.1 From 320ff567f299ed3f0a2d53906e632a1b0eda5599 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Aug 2012 19:14:29 +0200 Subject: netfilter: nf_nat: support IPv6 in TFTP NAT helper Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 5 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_tftp.c | 50 --------------------------------------- net/netfilter/Kconfig | 5 ++++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_tftp.c | 3 +-- net/netfilter/nf_nat_tftp.c | 50 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 57 insertions(+), 58 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_nat_tftp.c create mode 100644 net/netfilter/nf_nat_tftp.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 843fe17db20d..131e53702e77 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -221,11 +221,6 @@ config NF_NAT_PROTO_GRE tristate depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE -config NF_NAT_TFTP - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_TFTP - config NF_NAT_PPTP tristate depends on NF_CONNTRACK && NF_NAT_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 17e649bb98ba..b7dd18987237 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c deleted file mode 100644 index ccabbda71a3e..000000000000 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ /dev/null @@ -1,50 +0,0 @@ -/* (C) 2001-2002 Magnus Boden - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include -#include - -MODULE_AUTHOR("Magnus Boden "); -MODULE_DESCRIPTION("TFTP NAT helper"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_tftp"); - -static unsigned int help(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp) -{ - const struct nf_conn *ct = exp->master; - - exp->saved_proto.udp.port - = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - exp->dir = IP_CT_DIR_REPLY; - exp->expectfn = nf_nat_follow_master; - if (nf_ct_expect_related(exp) != 0) - return NF_DROP; - return NF_ACCEPT; -} - -static void __exit nf_nat_tftp_fini(void) -{ - RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); - synchronize_rcu(); -} - -static int __init nf_nat_tftp_init(void) -{ - BUG_ON(nf_nat_tftp_hook != NULL); - RCU_INIT_POINTER(nf_nat_tftp_hook, help); - return 0; -} - -module_init(nf_nat_tftp_init); -module_exit(nf_nat_tftp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index cabe4da0e191..052836e2490e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -400,6 +400,11 @@ config NF_NAT_SIP depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP +config NF_NAT_TFTP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_TFTP + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0dd792972cae..403ea8125884 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o +obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 9363e1c66466..81fc61c05263 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -72,8 +72,7 @@ static int tftp_help(struct sk_buff *skb, nf_ct_dump_tuple(&exp->tuple); nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); - if (nf_nat_tftp && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) + if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(skb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c new file mode 100644 index 000000000000..ccabbda71a3e --- /dev/null +++ b/net/netfilter/nf_nat_tftp.c @@ -0,0 +1,50 @@ +/* (C) 2001-2002 Magnus Boden + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include +#include + +MODULE_AUTHOR("Magnus Boden "); +MODULE_DESCRIPTION("TFTP NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_tftp"); + +static unsigned int help(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp) +{ + const struct nf_conn *ct = exp->master; + + exp->saved_proto.udp.port + = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + exp->dir = IP_CT_DIR_REPLY; + exp->expectfn = nf_nat_follow_master; + if (nf_ct_expect_related(exp) != 0) + return NF_DROP; + return NF_ACCEPT; +} + +static void __exit nf_nat_tftp_fini(void) +{ + RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_tftp_init(void) +{ + BUG_ON(nf_nat_tftp_hook != NULL); + RCU_INIT_POINTER(nf_nat_tftp_hook, help); + return 0; +} + +module_init(nf_nat_tftp_init); +module_exit(nf_nat_tftp_fini); -- cgit v1.2.1 From 8a91bb0c304b0853f8c59b1b48c7822c52362cba Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:31 +0200 Subject: netfilter: ip6tables: add stateless IPv6-to-IPv6 Network Prefix Translation target Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/Kconfig | 9 +++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NPT.c | 165 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 net/ipv6/netfilter/ip6t_NPT.c (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7bdf73be9a99..3b73254d7bf1 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -177,6 +177,15 @@ config IP6_NF_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NPT + tristate "NPT (Network Prefix translation) target support" + depends on NETFILTER_ADVANCED + help + This option adds the `SNPT' and `DNPT' target, which perform + stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0864ce601651..5752132ca159 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,5 +36,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o +obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c new file mode 100644 index 000000000000..e9486915eff6 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2011, 2012 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static __sum16 csum16_complement(__sum16 a) +{ + return (__force __sum16)(0xffff - (__force u16)a); +} + +static __sum16 csum16_add(__sum16 a, __sum16 b) +{ + u16 sum; + + sum = (__force u16)a + (__force u16)b; + sum += (__force u16)a < (__force u16)b; + return (__force __sum16)sum; +} + +static __sum16 csum16_sub(__sum16 a, __sum16 b) +{ + return csum16_add(a, csum16_complement(b)); +} + +static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) +{ + struct ip6t_npt_tginfo *npt = par->targinfo; + __sum16 src_sum = 0, dst_sum = 0; + unsigned int i; + + if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { + src_sum = csum16_add(src_sum, + (__force __sum16)npt->src_pfx.in6.s6_addr16[i]); + dst_sum = csum16_add(dst_sum, + (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]); + } + + npt->adjustment = csum16_sub(src_sum, dst_sum); + return 0; +} + +static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, + struct in6_addr *addr) +{ + unsigned int pfx_len; + unsigned int i, idx; + __be32 mask; + __sum16 sum; + + pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); + for (i = 0; i < pfx_len; i += 32) { + if (pfx_len - i >= 32) + mask = 0; + else + mask = htonl(~((1 << (pfx_len - i)) - 1)); + + idx = i / 32; + addr->s6_addr32[idx] &= mask; + addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + } + + if (pfx_len <= 48) + idx = 3; + else { + for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { + if ((__force __sum16)addr->s6_addr16[idx] != + CSUM_MANGLED_0) + break; + } + if (idx == ARRAY_SIZE(addr->s6_addr16)) + return false; + } + + sum = csum16_add((__force __sum16)addr->s6_addr16[idx], + npt->adjustment); + if (sum == CSUM_MANGLED_0) + sum = 0; + *(__force __sum16 *)&addr->s6_addr16[idx] = sum; + + return true; +} + +static unsigned int +ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, saddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static unsigned int +ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, daddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static struct xt_target ip6t_npt_target_reg[] __read_mostly = { + { + .name = "SNPT", + .target = ip6t_snpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "DNPT", + .target = ip6t_dnpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init ip6t_npt_init(void) +{ + return xt_register_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +static void __exit ip6t_npt_exit(void) +{ + xt_unregister_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +module_init(ip6t_npt_init); +module_exit(ip6t_npt_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS("ip6t_SNPT"); +MODULE_ALIAS("ip6t_DNPT"); -- cgit v1.2.1 From 0a54e939d8c2647c711ef2369c3e73c3b7f3028c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:11 +0000 Subject: ipvs: fix error return code Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 72bf32a84874..f51013c07b9f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1171,8 +1171,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!svc->stats.cpustats) + if (!svc->stats.cpustats) { + ret = -ENOMEM; goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); -- cgit v1.2.1 From ef6acf68c259d907517dcc0ffefcd4e30276ae29 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:16 +0000 Subject: netfilter: ctnetlink: fix error return code in init path Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da4fc37a8578..9807f3278fcb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2790,7 +2790,8 @@ static int __init ctnetlink_init(void) goto err_unreg_subsys; } - if (register_pernet_subsys(&ctnetlink_net_ops)) { + ret = register_pernet_subsys(&ctnetlink_net_ops); + if (ret < 0) { pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } -- cgit v1.2.1 From 6fc09f10f12477bdaa54e94f9cb428bef6d81315 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:17 +0000 Subject: netfilter: nfnetlink_log: fix error return code in init path Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 592091c1260b..14e2f3903142 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -996,8 +996,10 @@ static int __init nfnetlink_log_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_log", 0440, - proc_net_netfilter, &nful_file_ops)) + proc_net_netfilter, &nful_file_ops)) { + status = -ENOMEM; goto cleanup_logger; + } #endif return status; -- cgit v1.2.1 From 6549dd43c04327071571edf7aea4465b16539422 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 23 Aug 2012 15:36:55 +0000 Subject: net: dev: fix the incorrect hold of net namespace's lo device When moving a net device from one net namespace to another net namespace,dev_change_net_namespace calls NETDEV_DOWN event,so the original net namespace's dst entries which beloned to this net device will be put into dst_garbage list. then dev_change_net_namespace will set this net device's net to the new net namespace. If we unregister this net device's driver, this will trigger the NETDEV_UNREGISTER_FINAL event, dst_ifdown will be called, and get this net device's dst entries from dst_garbage list, put these entries' dev to the new net namespace's lo device. It's not what we want,actually we need these dst entries hold the original net namespace's lo device,this incorrect device holding will trigger emg message like below. unregister_netdevice: waiting for lo to become free. Usage count = 1 so we should call NETDEV_UNREGISTER_FINAL event in dev_change_net_namespace too,in order to make sure dst entries already in the dst_garbage list, we need rcu_barrier before we call NETDEV_UNREGISTER_FINAL event. With help form Eric Dumazet. Signed-off-by: Gao feng Cc: Eric Dumazet Cc: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3401e2dab7cc..a5fc3e301cf2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6259,6 +6259,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char the device is just moving and can keep their slaves up. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + rcu_barrier(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* -- cgit v1.2.1 From 3f509c689a07a4aa989b426893d8491a7ffcc410 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 15:24:09 +0000 Subject: netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation We're hitting bug while trying to reinsert an already existing expectation: kernel BUG at kernel/timer.c:895! invalid opcode: 0000 [#1] SMP [...] Call Trace: [] nf_ct_expect_related_report+0x4a0/0x57a [nf_conntrack] [] ? in4_pton+0x72/0x131 [] ip_nat_sdp_media+0xeb/0x185 [nf_nat_sip] [] set_expected_rtp_rtcp+0x32d/0x39b [nf_conntrack_sip] [] process_sdp+0x30c/0x3ec [nf_conntrack_sip] [] ? irq_exit+0x9a/0x9c [] ? ip_nat_sdp_media+0x185/0x185 [nf_nat_sip] We have to remove the RTP expectation if the RTCP expectation hits EBUSY since we keep trying with other ports until we succeed. Reported-by: Rafal Fitt Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4ad9cf173992..9c87cde28ff8 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -502,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); port = 0; break; -- cgit v1.2.1 From 99469c32f79a32d8481f87be0d3c66dad286f4ec Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Fri, 24 Aug 2012 01:07:38 +0000 Subject: l2tp: avoid to use synchronize_rcu in tunnel free function Avoid to use synchronize_rcu in l2tp_tunnel_free because context may be atomic. Signed-off-by: Dmitry Kozlov Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 3 +-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 393355d37b47..513cab08a986 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) /* Remove from tunnel list */ spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); + kfree_rcu(tunnel, rcu); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - synchronize_rcu(); atomic_dec(&l2tp_tunnel_count); - kfree(tunnel); } /* Create a socket for the tunnel, if one isn't set up by diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a38ec6cdeee1..56d583e083a7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, -- cgit v1.2.1 From ee13040901c11b016d996ab5a946acaaa3461737 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Aug 2012 01:47:26 +0000 Subject: netpoll: provide an IP ident in UDP frames Let's fill IP header ident field with a meaningful value, it might help some setups. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 346b1eb83a1f..5af9c2692506 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -388,6 +388,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; + static atomic_t ip_ident; udp_len = len + sizeof(*udph); ip_len = udp_len + sizeof(*iph); @@ -423,7 +424,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) put_unaligned(0x45, (unsigned char *)iph); iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = 0; + iph->id = htons(atomic_inc_return(&ip_ident)); iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; -- cgit v1.2.1 From acbb219d5f53821b2d0080d047800410c0420ea1 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 24 Aug 2012 07:38:35 +0000 Subject: net: ipv4: ipmr_expire_timer causes crash when removing net namespace When tearing down a net namespace, ipv4 mr_table structures are freed without first deactivating their timers. This can result in a crash in run_timer_softirq. This patch mimics the corresponding behaviour in ipv6. Locking and synchronization seem to be adequate. We are about to kfree mrt, so existing code should already make sure that no other references to mrt are pending or can be created by incoming traffic. The functions invoked here do not cause new references to mrt or other race conditions to be created. Invoking del_timer_sync guarantees that ipmr_expire_timer is inactive. Both ipmr_expire_process (whose completion we may have to wait in del_timer_sync) and mroute_clean_tables internally use mfc_unres_lock or other synchronizations when needed, and they both only modify mrt. Tested in Linux 3.4.8. Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..ebdf06f938bf 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static void ipmr_free_table(struct mr_table *mrt); + static int ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local); @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct mr_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - kfree(mrt); + ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - kfree(net->ipv4.mrt); + ipmr_free_table(net->ipv4.mrt); } #endif @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; } +static void ipmr_free_table(struct mr_table *mrt) +{ + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt); + kfree(mrt); +} + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -- cgit v1.2.1 From c5ae7d41927dbd208c885d4794e30617ad6cdf12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Aug 2012 12:33:07 +0000 Subject: ipv4: must use rcu protection while calling fib_lookup Following lockdep splat was reported by Pavel Roskin : [ 1570.586223] =============================== [ 1570.586225] [ INFO: suspicious RCU usage. ] [ 1570.586228] 3.6.0-rc3-wl-main #98 Not tainted [ 1570.586229] ------------------------------- [ 1570.586231] /home/proski/src/linux/net/ipv4/route.c:645 suspicious rcu_dereference_check() usage! [ 1570.586233] [ 1570.586233] other info that might help us debug this: [ 1570.586233] [ 1570.586236] [ 1570.586236] rcu_scheduler_active = 1, debug_locks = 0 [ 1570.586238] 2 locks held by Chrome_IOThread/4467: [ 1570.586240] #0: (slock-AF_INET){+.-...}, at: [] release_sock+0x2c/0xa0 [ 1570.586253] #1: (fnhe_lock){+.-...}, at: [] update_or_create_fnhe+0x2c/0x270 [ 1570.586260] [ 1570.586260] stack backtrace: [ 1570.586263] Pid: 4467, comm: Chrome_IOThread Not tainted 3.6.0-rc3-wl-main #98 [ 1570.586265] Call Trace: [ 1570.586271] [] lockdep_rcu_suspicious+0xfd/0x130 [ 1570.586275] [] update_or_create_fnhe+0x15c/0x270 [ 1570.586278] [] __ip_rt_update_pmtu+0x73/0xb0 [ 1570.586282] [] ip_rt_update_pmtu+0x29/0x90 [ 1570.586285] [] inet_csk_update_pmtu+0x2c/0x80 [ 1570.586290] [] tcp_v4_mtu_reduced+0x2e/0xc0 [ 1570.586293] [] tcp_release_cb+0xa4/0xb0 [ 1570.586296] [] release_sock+0x55/0xa0 [ 1570.586300] [] tcp_sendmsg+0x4af/0xf50 [ 1570.586305] [] inet_sendmsg+0x120/0x230 [ 1570.586308] [] ? inet_sk_rebuild_header+0x40/0x40 [ 1570.586312] [] ? sock_update_classid+0xbd/0x3b0 [ 1570.586315] [] ? sock_update_classid+0x130/0x3b0 [ 1570.586320] [] do_sock_write+0xc5/0xe0 [ 1570.586323] [] sock_aio_write+0x53/0x80 [ 1570.586328] [] do_sync_write+0xa3/0xe0 [ 1570.586332] [] vfs_write+0x165/0x180 [ 1570.586335] [] sys_write+0x45/0x90 [ 1570.586340] [] system_call_fastpath+0x16/0x1b Signed-off-by: Eric Dumazet Reported-by: Pavel Roskin Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 24fd4c596643..82cf2a722b23 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -934,12 +934,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } + rcu_read_unlock(); return mtu; } -- cgit v1.2.1 From 5b423f6a40a0327f9d40bc8b97ce9be266f74368 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 16:25:49 +0000 Subject: netfilter: nf_conntrack: fix racy timer handling with reliable events Existing code assumes that del_timer returns true for alive conntrack entries. However, this is not true if reliable events are enabled. In that case, del_timer may return true for entries that were just inserted in the dying list. Note that packets / ctnetlink may hold references to conntrack entries that were just inserted to such list. This patch fixes the issue by adding an independent timer for event delivery. This increases the size of the ecache extension. Still we can revisit this later and use variable size extensions to allocate this area on demand. Tested-by: Oliver Smith Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf4875565d67..2ceec64b19f9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -249,12 +249,15 @@ static void death_by_event(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { /* bad luck, let's retry again */ - ct->timeout.expires = jiffies + + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); return; } /* we've got the event delivered, now it's dying */ @@ -268,6 +271,9 @@ static void death_by_event(unsigned long ul_conntrack) void nf_ct_insert_dying_list(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); /* add this conntrack to the dying list */ spin_lock_bh(&nf_conntrack_lock); @@ -275,10 +281,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct) &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); /* set a new timer to retry event delivery */ - setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); - ct->timeout.expires = jiffies + + setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); } EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); -- cgit v1.2.1 From 46b66d7077b89fb4917ceef19b3f7dd86055c94a Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sun, 26 Aug 2012 16:37:07 +0000 Subject: decnet: fix shutdown parameter checking The allowed value of "how" is SHUT_RD/SHUT_WR/SHUT_RDWR (0/1/2), rather than SHUTDOWN_MASK (3). Signed-off-by: Xi Wang Acked-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2ba1a2814c24..307c322d53bb 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1313,10 +1313,10 @@ static int dn_shutdown(struct socket *sock, int how) if (scp->state == DN_O) goto out; - if (how != SHUTDOWN_MASK) + if (how != SHUT_RDWR) goto out; - sk->sk_shutdown = how; + sk->sk_shutdown = SHUTDOWN_MASK; dn_destroy_sock(sk); err = 0; -- cgit v1.2.1 From fc61b928dc4d72176cf4bd4d30bf1d22e599aefc Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sun, 26 Aug 2012 16:47:13 +0000 Subject: af_unix: fix shutdown parameter checking Return -EINVAL rather than 0 given an invalid "mode" parameter. Signed-off-by: Xi Wang Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c5ee4ff61364..8a84ab64cafd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2060,10 +2060,14 @@ static int unix_shutdown(struct socket *sock, int mode) struct sock *sk = sock->sk; struct sock *other; - mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); - - if (!mode) - return 0; + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; + /* This maps: + * SHUT_RD (0) -> RCV_SHUTDOWN (1) + * SHUT_WR (1) -> SEND_SHUTDOWN (2) + * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) + */ + ++mode; unix_state_lock(sk); sk->sk_shutdown |= mode; -- cgit v1.2.1 From 80f0fd8a7f18af78dd26bf94230e89c3ae82d4ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 26 Aug 2012 18:20:45 +0000 Subject: openvswitch: using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b7f38b161909..c7bf2f26525a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -427,19 +427,11 @@ void ovs_flow_deferred_free(struct sw_flow *flow) call_rcu(&flow->rcu, rcu_free_flow_callback); } -/* RCU callback used by ovs_flow_deferred_free_acts. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) { - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); + kfree_rcu(sf_acts, rcu); } static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) -- cgit v1.2.1 From 4a2c240691bfdb9b08e680fb8af3f8c011c60e02 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Sun, 26 Aug 2012 21:23:13 +0000 Subject: net:atm:fix up ENOIOCTLCMD error handling At commit 07d106d0, Linus pointed out that ENOIOCTLCMD should be translated as ENOTTY to user mode. Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Wanlong Gao Signed-off-by: David S. Miller --- net/atm/resources.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/resources.c b/net/atm/resources.c index 23f45ce6f351..0447d5d0b639 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -432,7 +432,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) size = dev->ops->ioctl(dev, cmd, buf); } if (size < 0) { - error = (size == -ENOIOCTLCMD ? -EINVAL : size); + error = (size == -ENOIOCTLCMD ? -ENOTTY : size); goto done; } } -- cgit v1.2.1 From 98d75c3724f9b37cdfdb85f821e10edcb743959e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 27 Aug 2012 06:30:01 +0000 Subject: ipv4: Minor logic clean-up in ipv4_mtu In ipv4_mtu there is some logic where we are testing for a non-zero value and a timer expiration, then setting the value to zero, and then testing if the value is zero we set it to a value based on the dst. Instead of bothering with the extra steps it is easier to just cleanup the logic so that we set it to the dst based value if it is zero or if the timer has expired. Signed-off-by: Alexander Duyck --- net/ipv4/route.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d2d1e153e7c4..dc9549b5eb1c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1134,10 +1134,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) const struct rtable *rt = (const struct rtable *) dst; unsigned int mtu = rt->rt_pmtu; - if (mtu && time_after_eq(jiffies, rt->dst.expires)) - mtu = 0; - - if (!mtu) + if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu && rt_is_output_route(rt)) -- cgit v1.2.1 From d1a53dfd114a6c53464de116cdab88d934bfe92f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 27 Aug 2012 23:39:24 +0000 Subject: net: fix documentation of skb_needs_linearize(). skb_needs_linearize() does not check highmem DMA as it does not call illegal_highdma() anymore, so there is no need to mention highmem DMA here. (Indeed, ~NETIF_F_SG flag, which is checked in skb_needs_linearize(), can be set when illegal_highdma() returns true, and we are assured that illegal_highdma() is invoked prior to skb_needs_linearize() as skb_needs_linearize() is a static method called only once. But ~NETIF_F_SG can be set not only there in this same invocation path. It can also be set when can_checksum_protocol() returns false). see commit 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853, Convert skb_need_linearize() to use precomputed features. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/core/dev.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a5fc3e301cf2..b1e6d6385516 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2184,9 +2184,7 @@ EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG, or if - * at least one of fragments is in highmem and device does not - * support DMA from it. + * 2. skb is fragmented and the device does not support SG. */ static inline int skb_needs_linearize(struct sk_buff *skb, int features) -- cgit v1.2.1 From 48f125ce1cc3ff275f9587b5bf56bf0f90766c7d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:12 +0000 Subject: net: ipv6: fix error return code Initialize return variable before exiting on an error path. The initial initialization of the return variable is also dropped, because that value is never used. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6dc7fd353ef5..282f3723ee19 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - err = -ENOMEM; - aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); -- cgit v1.2.1 From 599901c3e4204e9d9c5a24df5402cd91617a2a26 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:15 +0000 Subject: net/xfrm/xfrm_state.c: fix error return code Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 87cd0e4d4282..210be48d8ae3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1994,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) goto error; x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) + if (x->outer_mode == NULL) { + err = -EPROTONOSUPPORT; goto error; + } if (init_replay) { err = xfrm_init_replay(x); -- cgit v1.2.1 From eb7e0575966f9c84434e92c8a3f69719cc2e7571 Mon Sep 17 00:00:00 2001 From: Sorin Dumitru Date: Thu, 30 Aug 2012 02:01:45 +0000 Subject: ipv6: remove some deadcode __ipv6_regen_rndid no longer returns anything other than 0 so there's no point in verifying what it returns Signed-off-by: Sorin Dumitru Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..055627f6df22 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) #endif #ifdef CONFIG_IPV6_PRIVACY -static int __ipv6_regen_rndid(struct inet6_dev *idev); -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +static void __ipv6_regen_rndid(struct inet6_dev *idev); +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); #endif @@ -852,16 +852,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { - spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); - pr_warn("%s: regeneration of randomized interface id failed\n", - __func__); - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; - } + __ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -1600,7 +1591,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) #ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static int __ipv6_regen_rndid(struct inet6_dev *idev) +static void __ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -1627,8 +1618,6 @@ regen: if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) goto regen; } - - return 0; } static void ipv6_regen_rndid(unsigned long data) @@ -1642,8 +1631,7 @@ static void ipv6_regen_rndid(unsigned long data) if (idev->dead) goto out; - if (__ipv6_regen_rndid(idev) < 0) - goto out; + __ipv6_regen_rndid(idev); expires = jiffies + idev->cnf.temp_prefered_lft * HZ - @@ -1664,13 +1652,10 @@ out: in6_dev_put(idev); } -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { - int ret = 0; - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ret = __ipv6_regen_rndid(idev); - return ret; + __ipv6_regen_rndid(idev); } #endif -- cgit v1.2.1 From 1046716368979dee857a2b8a91c4a8833f21b9cb Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:11 +0000 Subject: tcp: TCP Fast Open Server - header & support functions This patch adds all the necessary data structure and support functions to implement TFO server side. It also documents a number of flags for the sysctl_tcp_fastopen knob, and adds a few Linux extension MIBs. In addition, it includes the following: 1. a new TCP_FASTOPEN socket option an application must call to supply a max backlog allowed in order to enable TFO on its listener. 2. A number of key data structures: "fastopen_rsk" in tcp_sock - for a big socket to access its request_sock for retransmission and ack processing purpose. It is non-NULL iff 3WHS not completed. "fastopenq" in request_sock_queue - points to a per Fast Open listener data structure "fastopen_queue" to keep track of qlen (# of outstanding Fast Open requests) and max_qlen, among other things. "listener" in tcp_request_sock - to point to the original listener for book-keeping purpose, i.e., to maintain qlen against max_qlen as part of defense against IP spoofing attack. 3. various data structure and functions, many in tcp_fastopen.c, to support server side Fast Open cookie operations, including /proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/proc.c | 4 +++ net/ipv4/sysctl_net_ipv4.c | 45 +++++++++++++++++++++++++ net/ipv4/tcp_fastopen.c | 83 +++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_input.c | 4 +-- 4 files changed, 133 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250b..8de53e1ddd54 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), + SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), + SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), + SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), + SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e78c79b5586..9205e492dc9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } +int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct tcp_fastopen_context *ctxt; + int ret; + u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ + + tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); + if (!tbl.data) + return -ENOMEM; + + rcu_read_lock(); + ctxt = rcu_dereference(tcp_fastopen_ctx); + if (ctxt) + memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + rcu_read_unlock(); + + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", + user_key[0], user_key[1], user_key[2], user_key[3]); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (write && ret == 0) { + if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, + user_key + 2, user_key + 3) != 4) { + ret = -EINVAL; + goto bad_key; + } + tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + } + +bad_key: + pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", + user_key[0], user_key[1], user_key[2], user_key[3], + (char *)tbl.data, ret); + kfree(tbl.data); + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -385,6 +424,12 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, { .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d7..8f7ef0ad80e5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,10 +1,91 @@ +#include #include #include +#include +#include +#include +#include +#include +#include -int sysctl_tcp_fastopen; +int sysctl_tcp_fastopen __read_mostly; + +struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + +static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); + +static void tcp_fastopen_ctx_free(struct rcu_head *head) +{ + struct tcp_fastopen_context *ctx = + container_of(head, struct tcp_fastopen_context, rcu); + crypto_free_cipher(ctx->tfm); + kfree(ctx); +} + +int tcp_fastopen_reset_cipher(void *key, unsigned int len) +{ + int err; + struct tcp_fastopen_context *ctx, *octx; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->tfm = crypto_alloc_cipher("aes", 0, 0); + + if (IS_ERR(ctx->tfm)) { + err = PTR_ERR(ctx->tfm); +error: kfree(ctx); + pr_err("TCP: TFO aes cipher alloc error: %d\n", err); + return err; + } + err = crypto_cipher_setkey(ctx->tfm, key, len); + if (err) { + pr_err("TCP: TFO cipher key error: %d\n", err); + crypto_free_cipher(ctx->tfm); + goto error; + } + memcpy(ctx->key, key, len); + + spin_lock(&tcp_fastopen_ctx_lock); + + octx = rcu_dereference_protected(tcp_fastopen_ctx, + lockdep_is_held(&tcp_fastopen_ctx_lock)); + rcu_assign_pointer(tcp_fastopen_ctx, ctx); + spin_unlock(&tcp_fastopen_ctx_lock); + + if (octx) + call_rcu(&octx->rcu, tcp_fastopen_ctx_free); + return err; +} + +/* Computes the fastopen cookie for the peer. + * The peer address is a 128 bits long (pad with zeros for IPv4). + * + * The caller must check foc->len to determine if a valid cookie + * has been generated successfully. +*/ +void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) +{ + __be32 peer_addr[4] = { addr, 0, 0, 0 }; + struct tcp_fastopen_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(tcp_fastopen_ctx); + if (ctx) { + crypto_cipher_encrypt_one(ctx->tfm, + foc->val, + (__u8 *)peer_addr); + foc->len = TCP_FASTOPEN_COOKIE_SIZE; + } + rcu_read_unlock(); +} static int __init tcp_fastopen_init(void) { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(key, sizeof(key)); return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce4ffe9ed556..d47d5fe8f3f0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -378,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* 4. Try to fixup all. It is made immediately after connection enters * established state. */ -static void tcp_init_buffer_space(struct sock *sk) +void tcp_init_buffer_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -4038,7 +4038,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) } /* When we get a reset we do this. */ -static void tcp_reset(struct sock *sk) +void tcp_reset(struct sock *sk) { /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { -- cgit v1.2.1 From 8336886f786fdacbc19b719c1f7ea91eb70706d4 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:12 +0000 Subject: tcp: TCP Fast Open Server - support TFO listeners This patch builds on top of the previous patch to add the support for TFO listeners. This includes - 1. allocating, properly initializing, and managing the per listener fastopen_queue structure when TFO is enabled 2. changes to the inet_csk_accept code to support TFO. E.g., the request_sock can no longer be freed upon accept(), not until 3WHS finishes 3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg() if it's a TFO socket 4. properly closing a TFO listener, and a TFO socket before 3WHS finishes 5. supporting TCP_FASTOPEN socket option 6. modifying tcp_check_req() to use to check a TFO socket as well as request_sock 7. supporting TCP's TFO cookie option 8. adding a new SYN-ACK retransmit handler to use the timer directly off the TFO socket rather than the listener socket. Note that TFO server side will not retransmit anything other than SYN-ACK until the 3WHS is completed. The patch also contains an important function "reqsk_fastopen_remove()" to manage the somewhat complex relation between a listener, its request_sock, and the corresponding child socket. See the comment above the function for the detail. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/request_sock.c | 95 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 28 +++++++++++- net/ipv4/inet_connection_sock.c | 57 ++++++++++++++++++++++--- net/ipv4/syncookies.c | 1 + net/ipv4/tcp.c | 49 ++++++++++++++++++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_minisocks.c | 61 +++++++++++++++++++++----- net/ipv4/tcp_output.c | 21 ++++++--- net/ipv4/tcp_timer.c | 39 ++++++++++++++++- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 5 ++- 11 files changed, 326 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 9b570a6a33c5..c31d9e8668c3 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) kfree(lopt); } +/* + * This function is called to set a Fast Open socket's "fastopen_rsk" field + * to NULL when a TFO socket no longer needs to access the request_sock. + * This happens only after 3WHS has been either completed or aborted (e.g., + * RST is received). + * + * Before TFO, a child socket is created only after 3WHS is completed, + * hence it never needs to access the request_sock. things get a lot more + * complex with TFO. A child socket, accepted or not, has to access its + * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts, + * until 3WHS is either completed or aborted. Afterwards the req will stay + * until either the child socket is accepted, or in the rare case when the + * listener is closed before the child is accepted. + * + * In short, a request socket is only freed after BOTH 3WHS has completed + * (or aborted) and the child socket has been accepted (or listener closed). + * When a child socket is accepted, its corresponding req->sk is set to + * NULL since it's no longer needed. More importantly, "req->sk == NULL" + * will be used by the code below to determine if a child socket has been + * accepted or not, and the check is protected by the fastopenq->lock + * described below. + * + * Note that fastopen_rsk is only accessed from the child socket's context + * with its socket lock held. But a request_sock (req) can be accessed by + * both its child socket through fastopen_rsk, and a listener socket through + * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin + * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created. + * only in the rare case when both the listener and the child locks are held, + * e.g., in inet_csk_listen_stop() do we not need to acquire the lock. + * The lock also protects other fields such as fastopenq->qlen, which is + * decremented by this function when fastopen_rsk is no longer needed. + * + * Note that another solution was to simply use the existing socket lock + * from the listener. But first socket lock is difficult to use. It is not + * a simple spin lock - one must consider sock_owned_by_user() and arrange + * to use sk_add_backlog() stuff. But what really makes it infeasible is the + * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to + * acquire a child's lock while holding listener's socket lock. A corner + * case might also exist in tcp_v4_hnd_req() that will trigger this locking + * order. + * + * When a TFO req is created, it needs to sock_hold its listener to prevent + * the latter data structure from going away. + * + * This function also sets "treq->listener" to NULL and unreference listener + * socket. treq->listener is used by the listener so it is protected by the + * fastopenq->lock in this function. + */ +void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, + bool reset) +{ + struct sock *lsk = tcp_rsk(req)->listener; + struct fastopen_queue *fastopenq = + inet_csk(lsk)->icsk_accept_queue.fastopenq; + + BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk)); + + tcp_sk(sk)->fastopen_rsk = NULL; + spin_lock_bh(&fastopenq->lock); + fastopenq->qlen--; + tcp_rsk(req)->listener = NULL; + if (req->sk) /* the child socket hasn't been accepted yet */ + goto out; + + if (!reset || lsk->sk_state != TCP_LISTEN) { + /* If the listener has been closed don't bother with the + * special RST handling below. + */ + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + reqsk_free(req); + return; + } + /* Wait for 60secs before removing a req that has triggered RST. + * This is a simple defense against TFO spoofing attack - by + * counting the req against fastopen.max_qlen, and disabling + * TFO when the qlen exceeds max_qlen. + * + * For more details see CoNext'11 "TCP Fast Open" paper. + */ + req->expires = jiffies + 60*HZ; + if (fastopenq->rskq_rst_head == NULL) + fastopenq->rskq_rst_head = req; + else + fastopenq->rskq_rst_tail->dl_next = req; + + req->dl_next = NULL; + fastopenq->rskq_rst_tail = req; + fastopenq->qlen++; +out: + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + return; +} diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6681ccf5c3ee..4f70ef0b946d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -149,6 +149,11 @@ void inet_sock_destruct(struct sock *sk) pr_err("Attempt to release alive inet socket %p\n", sk); return; } + if (sk->sk_type == SOCK_STREAM) { + struct fastopen_queue *fastopenq = + inet_csk(sk)->icsk_accept_queue.fastopenq; + kfree(fastopenq); + } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); @@ -212,6 +217,26 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { + /* Check special setups for testing purpose to enable TFO w/o + * requiring TCP_FASTOPEN sockopt. + * Note that only TCP sockets (SOCK_STREAM) will reach here. + * Also fastopenq may already been allocated because this + * socket was in TCP_LISTEN state previously but was + * shutdown() (rather than close()). + */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) + err = fastopen_init_queue(sk, backlog); + else if ((sysctl_tcp_fastopen & + TFO_SERVER_WO_SOCKOPT2) != 0) + err = fastopen_init_queue(sk, + ((uint)sysctl_tcp_fastopen) >> 16); + else + err = 0; + if (err) + goto out; + } err = inet_csk_listen_start(sk, backlog); if (err) goto out; @@ -701,7 +726,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7f75f21d7b83..8464b79c493f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct sock *newsk; + struct request_sock *req; int error; lock_sock(sk); @@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { + if (reqsk_queue_empty(queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) if (error) goto out_err; } - - newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - WARN_ON(newsk->sk_state == TCP_SYN_RECV); + req = reqsk_queue_remove(queue); + newsk = req->sk; + + sk_acceptq_removed(sk); + if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) { + spin_lock_bh(&queue->fastopenq->lock); + if (tcp_rsk(req)->listener) { + /* We are still waiting for the final ACK from 3WHS + * so can't free req now. Instead, we set req->sk to + * NULL to signify that the child socket is taken + * so reqsk_fastopen_remove() will free the req + * when 3WHS finishes (or is aborted). + */ + req->sk = NULL; + req = NULL; + } + spin_unlock_bh(&queue->fastopenq->lock); + } out: release_sock(sk); + if (req) + __reqsk_free(req); return newsk; out_err: newsk = NULL; + req = NULL; *err = error; goto out; } @@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct request_sock *acc_req; struct request_sock *req; inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); + acc_req = reqsk_queue_yank_acceptq(queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&icsk->icsk_accept_queue); + reqsk_queue_destroy(queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); + if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) { + BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(sk != tcp_rsk(req)->listener); + + /* Paranoid, to prevent race condition if + * an inbound pkt destined for child is + * blocked by sock lock in tcp_v4_rcv(). + * Also to satisfy an assertion in + * tcp_v4_destroy_sock(). + */ + tcp_sk(child)->fastopen_rsk = NULL; + sock_put(sk); + } inet_csk_destroy_sock(child); bh_unlock_sock(child); @@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } + if (queue->fastopenq != NULL) { + /* Free all the reqs queued in rskq_rst_head. */ + spin_lock_bh(&queue->fastopenq->lock); + acc_req = queue->fastopenq->rskq_rst_head; + queue->fastopenq->rskq_rst_head = NULL; + spin_unlock_bh(&queue->fastopenq->lock); + while ((req = acc_req) != NULL) { + acc_req = req->dl_next; + __reqsk_free(req); + } + } WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650e1528e1e6..ba48e799b031 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; + treq->listener = NULL; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1daf..df83d744e380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM | POLLRDHUP; - /* Connected? */ - if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + /* Connected or passive Fast Open socket? */ + if (sk->sk_state != TCP_SYN_SENT && + (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; + } clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto do_error; + } if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { @@ -2144,6 +2155,10 @@ void tcp_close(struct sock *sk, long timeout) * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK + * XXX (TFO) - To start off we don't support SYN+ACK+FIN + * in a single packet! (May consider it later but will + * probably need API support or TCP_CORK SYN-ACK until + * data is written and socket is closed.) */ tcp_send_fin(sk); } @@ -2215,8 +2230,16 @@ adjudge_to_death: } } - if (sk->sk_state == TCP_CLOSE) + if (sk->sk_state == TCP_CLOSE) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + /* We could get here with a non-NULL req if the socket is + * aborted (e.g., closed with unread data) before 3WHS + * finishes. + */ + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); + } /* Otherwise, socket is reprieved until protocol close. */ out: @@ -2688,6 +2711,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else icsk->icsk_user_timeout = msecs_to_jiffies(val); break; + + case TCP_FASTOPEN: + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | + TCPF_LISTEN))) + err = fastopen_init_queue(sk, val); + else + err = -EINVAL; + break; default: err = -ENOPROTOOPT; break; @@ -3501,11 +3532,15 @@ EXPORT_SYMBOL(tcp_cookie_generator); void tcp_done(struct sock *sk) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); sk->sk_shutdown = SHUTDOWN_MASK; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 36f02f954ac1..bb148dee1edd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -839,7 +839,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -1554,7 +1554,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6ff7f10dce9d..e965319d610b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -507,6 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); + newtp->fastopen_rsk = NULL; TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } @@ -515,13 +516,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, EXPORT_SYMBOL(tcp_create_openreq_child); /* - * Process an incoming packet for SYN_RECV sockets represented - * as a request_sock. + * Process an incoming packet for SYN_RECV sockets represented as a + * request_sock. Normally sk is the listener socket but for TFO it + * points to the child socket. + * + * XXX (TFO) - The current impl contains a special check for ack + * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev) + struct request_sock **prev, + bool fastopen) { struct tcp_options_received tmp_opt; const u8 *hash_location; @@ -530,6 +536,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; + BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); + tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); @@ -565,6 +573,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. + * + * Note that even if there is new data in the SYN packet + * they will be thrown away too. */ req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; @@ -622,9 +633,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * sent (the segment carries an unacceptable ACK) ... * a reset is sent." * - * Invalid ACK: reset will be sent by listening socket + * Invalid ACK: reset will be sent by listening socket. + * Note that the ACK validity check for a Fast Open socket is done + * elsewhere and is checked directly against the child socket rather + * than req because user data may have been sent out. */ - if ((flg & TCP_FLAG_ACK) && + if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; @@ -637,7 +651,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* RFC793: "first check sequence number". */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { + tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_ack(sk, skb, req); @@ -648,7 +662,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) + if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) req->ts_recent = tmp_opt.rcv_tsval; if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { @@ -667,10 +681,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. + * + * XXX (TFO) - if we ever allow "data after SYN", the + * following check needs to be removed. */ if (!(flg & TCP_FLAG_ACK)) return NULL; + /* For Fast Open no more processing is needed (sk is the + * child socket). + */ + if (fastopen) + return sk; + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { @@ -706,11 +729,21 @@ listen_overflow: } embryonic_reset: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST)) { + /* Received a bad SYN pkt - for TFO We try not to reset + * the local connection unless it's really necessary to + * avoid becoming vulnerable to outside attack aiming at + * resetting legit local connections. + */ req->rsk_ops->send_reset(sk, skb); - - inet_csk_reqsk_queue_drop(sk, req, prev); + } else if (fastopen) { /* received a valid RST pkt */ + reqsk_fastopen_remove(sk, req, true); + tcp_reset(sk); + } + if (!fastopen) { + inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + } return NULL; } EXPORT_SYMBOL(tcp_check_req); @@ -719,6 +752,12 @@ EXPORT_SYMBOL(tcp_check_req); * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. + * + * For the vast majority of cases child->sk_state will be TCP_SYN_RECV + * when entering. But other states are possible due to a race condition + * where after __inet_lookup_established() fails but before the listener + * locked is obtained, other packets cause the same connection to + * be created. */ int tcp_child_process(struct sock *parent, struct sock *child, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d04632673a9e..9383b51f3efc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp) + struct tcp_extend_values *xvp, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - + if (foc != NULL) { + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + need = (need + 3) & ~3U; /* Align to 32 bits */ + if (remaining >= need) { + opts->options |= OPTION_FAST_OPEN_COOKIE; + opts->fastopen_cookie = foc; + remaining -= need; + } + } /* Similar rationale to tcp_syn_options() applies here, too. * If the options fit, the same options should fit now! */ @@ -2658,7 +2667,8 @@ int tcp_send_synack(struct sock *sk) */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp, + struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; struct tcp_extend_values *xvp = tcp_xv(rvp); @@ -2718,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp) + skb, &opts, &md5, xvp, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2772,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } th->seq = htonl(TCP_SKB_CB(skb)->seq); - th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); + /* XXX data is queued and acked as is. No buffer/window check */ + th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b774a03bd1dc..fc04711e80c8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -304,6 +304,35 @@ static void tcp_probe_timer(struct sock *sk) } } +/* + * Timer for Fast Open socket to retransmit SYNACK. Note that the + * sk here is the child socket, not the parent (listener) socket. + */ +static void tcp_fastopen_synack_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + int max_retries = icsk->icsk_syn_retries ? : + sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + struct request_sock *req; + + req = tcp_sk(sk)->fastopen_rsk; + req->rsk_ops->syn_ack_timeout(sk, req); + + if (req->retrans >= max_retries) { + tcp_write_err(sk); + return; + } + /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error + * returned from rtx_syn_ack() to make it more persistent like + * regular retransmit because if the child socket has been accepted + * it's not good to give up too easily. + */ + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + req->retrans++; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); +} + /* * The TCP retransmit timer. */ @@ -317,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk) tcp_resume_early_retransmit(sk); return; } - + if (tp->fastopen_rsk) { + BUG_ON(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); + tcp_fastopen_synack_timer(sk); + /* Before we receive ACK to our SYN-ACK don't retransmit + * anything else (e.g., data or FIN segments). + */ + return; + } if (!tp->packets_out) goto out; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb46061c813a..182ab9a85d6c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); + treq->listener = NULL; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f99b81d53cca..09078b9bc6f6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -475,7 +475,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -987,7 +987,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, inet6_iif(skb)); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, @@ -1179,6 +1179,7 @@ have_isn: want_cookie) goto drop_and_free; + tcp_rsk(req)->listener = NULL; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; -- cgit v1.2.1 From 168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:13 +0000 Subject: tcp: TCP Fast Open Server - main code path This patch adds the main processing path to complete the TFO server patches. A TFO request (i.e., SYN+data packet with a TFO cookie option) first gets processed in tcp_v4_conn_request(). If it passes the various TFO checks by tcp_fastopen_check(), a child socket will be created right away to be accepted by applications, rather than waiting for the 3WHS to finish. In additon to the use of TFO cookie, a simple max_qlen based scheme is put in place to fend off spoofed TFO attack. When a valid ACK comes back to tcp_rcv_state_process(), it will cause the state of the child socket to switch from either TCP_SYN_RECV to TCP_ESTABLISHED, or TCP_FIN_WAIT1 to TCP_FIN_WAIT2. At this time retransmission will resume for any unack'ed (data, FIN,...) segments. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 71 +++++++++++--- net/ipv4/tcp_ipv4.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 309 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d47d5fe8f3f0..8c304a400798 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3127,6 +3127,12 @@ void tcp_rearm_rto(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + /* If the retrans timer is currently being used by Fast Open + * for SYN-ACK retrans purpose, stay put. + */ + if (tp->fastopen_rsk) + return; + if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { @@ -5895,7 +5901,9 @@ discard: tcp_send_synack(sk); #if 0 /* Note, we could accept data and URG from this segment. - * There are no obstacles to make this. + * There are no obstacles to make this (except that we must + * either change tcp_recvmsg() to prevent it from returning data + * before 3WHS completes per RFC793, or employ TCP Fast Open). * * However, if we ignore data in ACKless segments sometimes, * we have no reasons to accept it sometimes. @@ -5935,6 +5943,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock *req; int queued = 0; tp->rx_opt.saw_tstamp = 0; @@ -5990,7 +5999,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (!tcp_validate_incoming(sk, skb, th, 0)) + req = tp->fastopen_rsk; + if (req != NULL) { + BUG_ON(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); + + if (tcp_check_req(sk, skb, req, NULL, true) == NULL) + goto discard; + } else if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; /* step 5: check the ACK field */ @@ -6000,7 +6016,22 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (sk->sk_state) { case TCP_SYN_RECV: if (acceptable) { - tp->copied_seq = tp->rcv_nxt; + /* Once we leave TCP_SYN_RECV, we no longer + * need req so release it. + */ + if (req) { + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for + * correct metrics. + */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -6022,23 +6053,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - - tcp_init_metrics(sk); - - tcp_init_congestion_control(sk); + if (req) { + /* Re-arm the timer because data may + * have been sent out. This is similar + * to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more + * aggressive and retranmitting any data + * sooner based on when they were sent + * out. + */ + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); /* Prevent spurious tcp_cwnd_restart() on * first data packet. */ tp->lsndtime = tcp_time_stamp; - tcp_mtup_init(sk); tcp_initialize_rcv_mss(sk); - tcp_init_buffer_space(sk); tcp_fast_path_on(tp); } else { return 1; @@ -6046,6 +6081,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, break; case TCP_FIN_WAIT1: + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (acceptable && req != NULL) { + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } if (tp->snd_una == tp->write_seq) { struct dst_entry *dst; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bb148dee1edd..e64abed249cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; struct sk_buff *skb; + struct request_sock *req; __u32 seq; __u32 remaining; int err; @@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); + req = tp->fastopen_rsk; seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, tp->snd_una, tp->snd_nxt) && + (req == NULL || seq != tcp_rsk(req)->snt_isn)) { + /* For a Fast Open socket, allow seq to be snt_isn. */ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) !icsk->icsk_backoff) break; + /* XXX (TFO) - revisit the following logic for TFO */ + if (sock_owned_by_user(sk)) break; @@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } + /* XXX (TFO) - if it's a TFO socket and has been accepted, rather + * than following the TCP_SYN_RECV case and closing the socket, + * we ignore the ICMP error and keep trying like a fully established + * socket. Is this the right thing to do? + */ + if (req && req->sk == NULL) + goto out; + switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: @@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed. + It can f.e. if SYNs crossed, + or Fast Open. */ if (!sock_owned_by_user(sk)) { sk->sk_err = err; @@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, - tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -1272,6 +1291,178 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif +static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct tcp_fastopen_cookie *valid_foc) +{ + bool skip_cookie = false; + struct fastopen_queue *fastopenq; + + if (likely(!fastopen_cookie_present(foc))) { + /* See include/net/tcp.h for the meaning of these knobs */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || + ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) + skip_cookie = true; /* no cookie to validate */ + else + return false; + } + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + /* A FO option is present; bump the counter. */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || + fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ + foc->len = -1; + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + if (skip_cookie) { + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { + if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || + memcmp(&foc->val[0], &valid_foc->val[0], + TCP_FASTOPEN_COOKIE_SIZE) != 0) + return false; + valid_foc->len = -1; + } + /* Acknowledge the data received from the peer. */ + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } else if (foc->len == 0) { /* Client requesting a cookie */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + } else { + /* Client sent a cookie with wrong size. Treat it + * the same as invalid and return a valid one. + */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + } + return false; +} + +static int tcp_v4_conn_req_fastopen(struct sock *sk, + struct sk_buff *skb, + struct sk_buff *skb_synack, + struct request_sock *req, + struct request_values *rvp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + const struct inet_request_sock *ireq = inet_rsk(req); + struct sock *child; + + req->retrans = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + kfree_skb(skb_synack); + return -1; + } + ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + /* XXX (TFO) - is it ok to ignore error and continue? */ + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_buffer_space(child); + tcp_init_metrics(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now. + * (Any reason not to?) + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { + /* Don't queue the skb if there is no payload in SYN. + * XXX (TFO) - How about SYN+FIN? + */ + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + } else { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + } + sk->sk_data_ready(sk, 0); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return 0; +} + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; @@ -1285,6 +1476,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; bool want_cookie = false; + struct flowi4 fl4; + struct tcp_fastopen_cookie foc = { .len = -1 }; + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + struct sk_buff *skb_synack; + int do_fastopen; /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1319,7 +1515,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, + want_cookie ? NULL : &foc); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && @@ -1377,8 +1574,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = cookie_v4_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { - struct flowi4 fl4; - /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering * state TIME-WAIT, and check against it before @@ -1419,14 +1614,52 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->snt_synack = tcp_time_stamp; - if (tcp_v4_send_synack(sk, dst, req, - (struct request_values *)&tmp_ext, - skb_get_queue_mapping(skb), - want_cookie) || - want_cookie) + if (dst == NULL) { + dst = inet_csk_route_req(sk, &fl4, req); + if (dst == NULL) + goto drop_and_free; + } + do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); + + /* We don't call tcp_v4_send_synack() directly because we need + * to make sure a child socket can be created successfully before + * sending back synack! + * + * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() + * (or better yet, call tcp_send_synack() in the child context + * directly, but will have to fix bunch of other code first) + * after syn_recv_sock() except one will need to first fix the + * latter to remove its dependency on the current implementation + * of tcp_v4_send_synack()->tcp_select_initial_window(). + */ + skb_synack = tcp_make_synack(sk, dst, req, + (struct request_values *)&tmp_ext, + fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); + + if (skb_synack) { + __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); + skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); + } else + goto drop_and_free; + + if (likely(!do_fastopen)) { + int err; + err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + /* Add the request_sock to the SYN table */ + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + if (fastopen_cookie_present(&foc) && foc.len != 0) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, + (struct request_values *)&tmp_ext)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; drop_and_release: @@ -1977,6 +2210,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_cookie_values_release); tp->cookie_values = NULL; } + BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); @@ -2425,6 +2659,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_sock *inet = inet_sk(sk); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); @@ -2469,7 +2704,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, + sk->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), len); } -- cgit v1.2.1 From 4907cb7b193a4f91c1fd30cf679c035e3644c64d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sat, 1 Sep 2012 10:31:09 -0700 Subject: treewide: fix comment/printk/variable typos Signed-off-by: Anatol Pomozov Signed-off-by: Jiri Kosina --- net/8021q/vlanproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index c718fd3664b6..4de77ea5fa37 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -105,7 +105,7 @@ static const struct file_operations vlandev_fops = { }; /* - * Proc filesystem derectory entries. + * Proc filesystem directory entries. */ /* Strings */ -- cgit v1.2.1 From a437d2744b0a4e5f405f6b9be284eb9e0a2d7741 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 1 Sep 2012 05:57:06 +0000 Subject: 6lowpan: Make a copy of skb's delivered to 6lowpan Since lowpan_process_data() modifies the skb (by calling skb_pull()), we need our own copy so that it doesn't affect the data received by other protcols (in this case, af_ieee802154). Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 6a095225148e..b28ec790c7c8 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1133,6 +1133,8 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + struct sk_buff *local_skb; + if (!netif_running(dev)) goto drop; @@ -1144,7 +1146,12 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - lowpan_process_data(skb); + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + lowpan_process_data(local_skb); + + kfree_skb(skb); break; default: break; -- cgit v1.2.1 From a2dc375e12334b3d8f787a48b2fb6172ccfb80ae Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 1 Sep 2012 05:57:07 +0000 Subject: 6lowpan: handle NETDEV_UNREGISTER event Before, it was impossible to remove a wpan device which had lowpan attached to it. Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index b28ec790c7c8..d5291113584f 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1063,12 +1063,6 @@ out: return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); } -static void lowpan_dev_free(struct net_device *dev) -{ - dev_put(lowpan_dev_info(dev)->real_dev); - free_netdev(dev); -} - static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; @@ -1118,7 +1112,7 @@ static void lowpan_setup(struct net_device *dev) dev->netdev_ops = &lowpan_netdev_ops; dev->header_ops = &lowpan_header_ops; dev->ml_priv = &lowpan_mlme; - dev->destructor = lowpan_dev_free; + dev->destructor = free_netdev; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1244,6 +1238,34 @@ static inline void __init lowpan_netlink_fini(void) rtnl_link_unregister(&lowpan_link_ops); } +static int lowpan_device_event(struct notifier_block *unused, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + LIST_HEAD(del_list); + struct lowpan_dev_record *entry, *tmp; + + if (dev->type != ARPHRD_IEEE802154) + goto out; + + if (event == NETDEV_UNREGISTER) { + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (lowpan_dev_info(entry->ldev)->real_dev == dev) + lowpan_dellink(entry->ldev, &del_list); + } + + unregister_netdevice_many(&del_list); + }; + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lowpan_dev_notifier = { + .notifier_call = lowpan_device_event, +}; + static struct packet_type lowpan_packet_type = { .type = __constant_htons(ETH_P_IEEE802154), .func = lowpan_rcv, @@ -1258,6 +1280,12 @@ static int __init lowpan_init_module(void) goto out; dev_add_pack(&lowpan_packet_type); + + err = register_netdevice_notifier(&lowpan_dev_notifier); + if (err < 0) { + dev_remove_pack(&lowpan_packet_type); + lowpan_netlink_fini(); + } out: return err; } @@ -1270,6 +1298,8 @@ static void __exit lowpan_cleanup_module(void) dev_remove_pack(&lowpan_packet_type); + unregister_netdevice_notifier(&lowpan_dev_notifier); + /* Now 6lowpan packet_type is removed, so no new fragments are * expected on RX, therefore that's the time to clean incomplete * fragments. -- cgit v1.2.1 From 39855b5ba9a72a80de96009011b7f8b2fb01612b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 31 Aug 2012 15:28:28 -0700 Subject: openvswitch: Fix typo Signed-off-by: Joe Stringer Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f3f96badf5aa..954405ceae9e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -45,7 +45,7 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } -/* remove VLAN header from packet and update csum accrodingly. */ +/* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { struct vlan_hdr *vhdr; -- cgit v1.2.1 From 6703aa74ad1ba2e598bb49a37f7518da1195bacb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 15:58:29 +0000 Subject: netfilter: xt_socket: fix compilation warnings with gcc 4.7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes compilation warnings in xt_socket with gcc-4.7. In file included from net/netfilter/xt_socket.c:22:0: net/netfilter/xt_socket.c: In function ‘socket_mt6_v1’: include/net/netfilter/nf_tproxy_core.h:175:23: warning: ‘sport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:265:16: note: ‘sport’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:23: warning: ‘dport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:265:9: note: ‘dport’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:6: warning: ‘saddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:264:27: note: ‘saddr’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:6: warning: ‘daddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:264:19: note: ‘daddr’ was declared here In file included from net/netfilter/xt_socket.c:22:0: net/netfilter/xt_socket.c: In function ‘socket_match.isra.4’: include/net/netfilter/nf_tproxy_core.h:75:2: warning: ‘protocol’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:113:5: note: ‘protocol’ was declared here In file included from include/net/tcp.h:37:0, from net/netfilter/xt_socket.c:17: include/net/inet_hashtables.h:356:45: warning: ‘sport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:112:16: note: ‘sport’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:106:23: warning: ‘dport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:112:9: note: ‘dport’ was declared here In file included from include/net/tcp.h:37:0, from net/netfilter/xt_socket.c:17: include/net/inet_hashtables.h:356:15: warning: ‘saddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:111:16: note: ‘saddr’ was declared here In file included from include/net/tcp.h:37:0, from net/netfilter/xt_socket.c:17: include/net/inet_hashtables.h:356:15: warning: ‘daddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:111:9: note: ‘daddr’ was declared here In file included from net/netfilter/xt_socket.c:22:0: net/netfilter/xt_socket.c: In function ‘socket_mt6_v1’: include/net/netfilter/nf_tproxy_core.h:175:23: warning: ‘sport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:268:16: note: ‘sport’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:23: warning: ‘dport’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:268:9: note: ‘dport’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:6: warning: ‘saddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:267:27: note: ‘saddr’ was declared here In file included from net/netfilter/xt_socket.c:22:0: include/net/netfilter/nf_tproxy_core.h:175:6: warning: ‘daddr’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/netfilter/xt_socket.c:267:19: note: ‘daddr’ was declared here Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9ea482d08cf7..63b2bdb59e95 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -108,9 +108,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; - __be32 daddr, saddr; - __be16 dport, sport; - u8 protocol; + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + u8 uninitialized_var(protocol); #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; @@ -261,9 +261,9 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; - struct in6_addr *daddr, *saddr; - __be16 dport, sport; - int thoff = 0, tproto; + struct in6_addr *daddr = NULL, *saddr = NULL; + __be16 uninitialized_var(dport), uninitialized_var(sport); + int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); -- cgit v1.2.1 From 236df005614bea6a2f9afa9867e3bdfc206c6291 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Aug 2012 00:53:14 +0000 Subject: netfilter: xt_CT: refactorize xt_ct_tg_check This patch adds xt_ct_set_helper and xt_ct_set_timeout to reduce the size of xt_ct_tg_check. This aims to improve code mantainability by splitting xt_ct_tg_check in smaller chunks. Suggested by Eric Dumazet. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 262 ++++++++++++++++++++++++-------------------------- 1 file changed, 126 insertions(+), 136 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 116018560c60..16c712563860 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -72,14 +72,44 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) return 0; } +static int +xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, + const struct xt_tgchk_param *par) +{ + struct nf_conntrack_helper *helper; + struct nf_conn_help *help; + u8 proto; + + proto = xt_ct_find_proto(par); + if (!proto) { + pr_info("You must specify a L4 protocol, and not use " + "inversions on it.\n"); + return -ENOENT; + } + + helper = nf_conntrack_helper_try_module_get(helper_name, par->family, + proto); + if (helper == NULL) { + pr_info("No such helper \"%s\"\n", helper_name); + return -ENOENT; + } + + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) { + module_put(helper->me); + return -ENOMEM; + } + + help->helper = helper; + return 0; +} + static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_ct_target_info *info = par->targinfo; struct nf_conntrack_tuple t; - struct nf_conn_help *help; struct nf_conn *ct; - int ret = 0; - u8 proto; + int ret; if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; @@ -112,31 +142,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { - struct nf_conntrack_helper *helper; - - ret = -ENOENT; - proto = xt_ct_find_proto(par); - if (!proto) { - pr_info("You must specify a L4 protocol, " - "and not use inversions on it.\n"); - goto err3; - } - - ret = -ENOENT; - helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (helper == NULL) { - pr_info("No such helper \"%s\"\n", info->helper); - goto err3; - } - - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); - if (help == NULL) + ret = xt_ct_set_helper(ct, info->helper, par); + if (ret < 0) goto err3; - - help->helper = helper; } __set_bit(IPS_TEMPLATE_BIT, &ct->status); @@ -164,17 +172,77 @@ static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout) } #endif +static int +xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, + const char *timeout_name) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + struct ctnl_timeout *timeout; + struct nf_conn_timeout *timeout_ext; + const struct ipt_entry *e = par->entryinfo; + struct nf_conntrack_l4proto *l4proto; + int ret = 0; + + rcu_read_lock(); + timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); + if (timeout_find_get == NULL) { + ret = -ENOENT; + pr_info("Timeout policy base is empty\n"); + goto out; + } + + if (e->ip.invflags & IPT_INV_PROTO) { + ret = -EINVAL; + pr_info("You cannot use inversion on L4 protocol\n"); + goto out; + } + + timeout = timeout_find_get(timeout_name); + if (timeout == NULL) { + ret = -ENOENT; + pr_info("No such timeout policy \"%s\"\n", timeout_name); + goto out; + } + + if (timeout->l3num != par->family) { + ret = -EINVAL; + pr_info("Timeout policy `%s' can only be used by L3 protocol " + "number %d\n", timeout_name, timeout->l3num); + goto err_put_timeout; + } + /* Make sure the timeout policy matches any existing protocol tracker, + * otherwise default to generic. + */ + l4proto = __nf_ct_l4proto_find(par->family, e->ip.proto); + if (timeout->l4proto->l4proto != l4proto->l4proto) { + ret = -EINVAL; + pr_info("Timeout policy `%s' can only be used by L4 protocol " + "number %d\n", + timeout_name, timeout->l4proto->l4proto); + goto err_put_timeout; + } + timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); + if (timeout_ext == NULL) + ret = -ENOMEM; + +err_put_timeout: + __xt_ct_tg_timeout_put(timeout); +out: + rcu_read_unlock(); + return ret; +#else + return -EOPNOTSUPP; +#endif +} + static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conntrack_tuple t; - struct nf_conn_help *help; struct nf_conn *ct; - int ret = 0; - u8 proto; -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - struct ctnl_timeout *timeout; -#endif + int ret; + if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; @@ -206,93 +274,16 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { - struct nf_conntrack_helper *helper; - - ret = -ENOENT; - proto = xt_ct_find_proto(par); - if (!proto) { - pr_info("You must specify a L4 protocol, " - "and not use inversions on it.\n"); - goto err3; - } - - ret = -ENOENT; - helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (helper == NULL) { - pr_info("No such helper \"%s\"\n", info->helper); - goto err3; - } - - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); - if (help == NULL) + ret = xt_ct_set_helper(ct, info->helper, par); + if (ret < 0) goto err3; - - help->helper = helper; } -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT if (info->timeout[0]) { - typeof(nf_ct_timeout_find_get_hook) timeout_find_get; - struct nf_conn_timeout *timeout_ext; - - rcu_read_lock(); - timeout_find_get = - rcu_dereference(nf_ct_timeout_find_get_hook); - - if (timeout_find_get) { - const struct ipt_entry *e = par->entryinfo; - struct nf_conntrack_l4proto *l4proto; - - if (e->ip.invflags & IPT_INV_PROTO) { - ret = -EINVAL; - pr_info("You cannot use inversion on " - "L4 protocol\n"); - goto err4; - } - timeout = timeout_find_get(info->timeout); - if (timeout == NULL) { - ret = -ENOENT; - pr_info("No such timeout policy \"%s\"\n", - info->timeout); - goto err4; - } - if (timeout->l3num != par->family) { - ret = -EINVAL; - pr_info("Timeout policy `%s' can only be " - "used by L3 protocol number %d\n", - info->timeout, timeout->l3num); - goto err5; - } - /* Make sure the timeout policy matches any existing - * protocol tracker, otherwise default to generic. - */ - l4proto = __nf_ct_l4proto_find(par->family, - e->ip.proto); - if (timeout->l4proto->l4proto != l4proto->l4proto) { - ret = -EINVAL; - pr_info("Timeout policy `%s' can only be " - "used by L4 protocol number %d\n", - info->timeout, - timeout->l4proto->l4proto); - goto err5; - } - timeout_ext = nf_ct_timeout_ext_add(ct, timeout, - GFP_ATOMIC); - if (timeout_ext == NULL) { - ret = -ENOMEM; - goto err5; - } - } else { - ret = -ENOENT; - pr_info("Timeout policy base is empty\n"); - goto err4; - } - rcu_read_unlock(); + ret = xt_ct_set_timeout(ct, par, info->timeout); + if (ret < 0) + goto err3; } -#endif __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); @@ -300,12 +291,6 @@ out: info->ct = ct; return 0; -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT -err5: - __xt_ct_tg_timeout_put(timeout); -err4: - rcu_read_unlock(); -#endif err3: nf_conntrack_free(ct); err2: @@ -330,15 +315,30 @@ static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) nf_ct_put(info->ct); } -static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +static void xt_ct_destroy_timeout(struct nf_conn *ct) { - struct xt_ct_target_info_v1 *info = par->targinfo; - struct nf_conn *ct = info->ct; - struct nf_conn_help *help; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; typeof(nf_ct_timeout_put_hook) timeout_put; + + rcu_read_lock(); + timeout_put = rcu_dereference(nf_ct_timeout_put_hook); + + if (timeout_put) { + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeout_put(timeout_ext->timeout); + } + rcu_read_unlock(); #endif +} + +static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) @@ -346,17 +346,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_l3proto_module_put(par->family); -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - rcu_read_lock(); - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); - - if (timeout_put) { - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeout_put(timeout_ext->timeout); - } - rcu_read_unlock(); -#endif + xt_ct_destroy_timeout(ct); } nf_ct_put(info->ct); } -- cgit v1.2.1 From 84b5ee939eba0115739c19c0e01ea903b029c9da Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Aug 2012 00:53:15 +0000 Subject: netfilter: nf_conntrack: add nf_ct_timeout_lookup This patch adds the new nf_ct_timeout_lookup function to encapsulate the timeout policy attachment that is called in the nf_conntrack_in path. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f83e79defed9..c9bb994ae9ba 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -930,7 +930,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - struct nf_conn_timeout *timeout_ext; unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; @@ -997,11 +996,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); /* Decide what timeout policy we want to apply to this flow. */ - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); + timeouts = nf_ct_timeout_lookup(net, ct, l4proto); ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { -- cgit v1.2.1 From 965505015beccc4ec900798070165875b8e8dccf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 Aug 2012 20:23:39 +0000 Subject: netfilter: remove xt_NOTRACK It was scheduled to be removed for a long time. Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: "David S. Miller" Cc: netfilter@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 13 ------------ net/netfilter/Makefile | 1 - net/netfilter/xt_NOTRACK.c | 53 ---------------------------------------------- 3 files changed, 67 deletions(-) delete mode 100644 net/netfilter/xt_NOTRACK.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 052836e2490e..3f4b3b4a7762 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -670,19 +670,6 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_NOTRACK - tristate '"NOTRACK" target support' - depends on IP_NF_RAW || IP6_NF_RAW - depends on NF_CONNTRACK - help - The NOTRACK target allows a select rule to specify - which packets *not* to enter the conntrack/NAT - subsystem with all the consequences (no ICMP error tracking, - no protocol helpers for the selected packets). - - If you want to compile it as a module, say M here and read - . If unsure, say `N'. - config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 403ea8125884..98244d4c75f2 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -85,7 +85,6 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o -obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c deleted file mode 100644 index 9d782181b6c8..000000000000 --- a/net/netfilter/xt_NOTRACK.c +++ /dev/null @@ -1,53 +0,0 @@ -/* This is a module which is used for setting up fake conntracks - * on packets so that they are not seen by the conntrack/NAT code. - */ -#include -#include - -#include -#include - -MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_NOTRACK"); -MODULE_ALIAS("ip6t_NOTRACK"); - -static unsigned int -notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) - return XT_CONTINUE; - - /* Attach fake conntrack entry. - If there is a real ct entry correspondig to this packet, - it'll hang aroun till timing out. We don't deal with it - for performance reasons. JK */ - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); - - return XT_CONTINUE; -} - -static struct xt_target notrack_tg_reg __read_mostly = { - .name = "NOTRACK", - .revision = 0, - .family = NFPROTO_UNSPEC, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, -}; - -static int __init notrack_tg_init(void) -{ - return xt_register_target(¬rack_tg_reg); -} - -static void __exit notrack_tg_exit(void) -{ - xt_unregister_target(¬rack_tg_reg); -} - -module_init(notrack_tg_init); -module_exit(notrack_tg_exit); -- cgit v1.2.1 From 2a6decfd8a5fae0422c98a22aa6bc30224b8a3ec Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 22 Aug 2012 19:59:57 +0000 Subject: netfilter: pass 'nf_hook_ops' instead of 'list_head' to nf_iterate() Since 'list_for_each_continue_rcu' has already been replaced by 'list_for_each_entry_continue_rcu', pass 'list_head' to nf_iterate() as a parameter can not benefit us any more. This patch will replace 'list_head' with 'nf_hook_ops' as the parameter of nf_iterate() to save code. Signed-off-by: Michael Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 24 ++++++++++-------------- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 6 +++--- 3 files changed, 14 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e61b3ac9591b..0b119d94c31a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -126,42 +126,38 @@ unsigned int nf_iterate(struct list_head *head, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, - struct list_head **i, + struct nf_hook_ops **elemp, int (*okfn)(struct sk_buff *), int hook_thresh) { unsigned int verdict; - struct nf_hook_ops *elem = list_entry_rcu(*i, struct nf_hook_ops, list); /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_entry_continue_rcu(elem, head, list) { - if (hook_thresh > elem->priority) + list_for_each_entry_continue_rcu((*elemp), head, list) { + if (hook_thresh > (*elemp)->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = elem->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - elem->hook, hook); + (*elemp)->hook, hook); continue; } #endif - if (verdict != NF_REPEAT) { - *i = &elem->list; + if (verdict != NF_REPEAT) return verdict; - } goto repeat; } } - *i = &elem->list; return NF_ACCEPT; } @@ -174,14 +170,14 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, int (*okfn)(struct sk_buff *), int hook_thresh) { - struct list_head *elem; + struct nf_hook_ops *elem; unsigned int verdict; int ret = 0; /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = &nf_hooks[pf][hook]; + elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list); next_hook: verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); @@ -193,8 +189,8 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_QBITS); + int err = nf_queue(skb, &elem->list, pf, hook, indev, outdev, + okfn, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 770f76432ad0..2886231d20a8 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,7 +18,7 @@ extern unsigned int nf_iterate(struct list_head *head, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, - struct list_head **i, + struct nf_hook_ops **elemp, int (*okfn)(struct sk_buff *), int hook_thresh); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ce60cf0f6c11..29fe102d3436 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -287,7 +287,7 @@ int nf_queue(struct sk_buff *skb, void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry->skb; - struct list_head *elem = &entry->elem->list; + struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; int err; @@ -297,7 +297,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { - elem = elem->prev; + elem = list_entry(elem->list.prev, struct nf_hook_ops, list); verdict = NF_ACCEPT; } @@ -323,7 +323,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = __nf_queue(skb, elem, entry->pf, entry->hook, + err = __nf_queue(skb, &elem->list, entry->pf, entry->hook, entry->indev, entry->outdev, entry->okfn, verdict >> NF_VERDICT_QBITS); if (err < 0) { -- cgit v1.2.1 From 1c15b677097fc133cc23108d98e0f0846e94cd48 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 22 Aug 2012 20:00:06 +0000 Subject: netfilter: pass 'nf_hook_ops' instead of 'list_head' to nf_queue() Since 'list_for_each_continue_rcu' has already been replaced by 'list_for_each_entry_continue_rcu', pass 'list_head' to nf_queue() as a parameter can not benefit us any more. This patch will replace 'list_head' with 'nf_hook_ops' as the parameter of nf_queue() and __nf_queue() to save code. Signed-off-by: Michael Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 4 ++-- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0b119d94c31a..68912dadf13d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -189,8 +189,8 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, &elem->list, pf, hook, indev, outdev, - okfn, verdict >> NF_VERDICT_QBITS); + int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 2886231d20a8..3deec997be89 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -24,7 +24,7 @@ extern unsigned int nf_iterate(struct list_head *head, /* nf_queue.c */ extern int nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 29fe102d3436..8d2cf9ec37a8 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -118,7 +118,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) * through nf_reinject(). */ static int __nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, @@ -155,7 +155,7 @@ static int __nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = list_entry(elem, struct nf_hook_ops, list), + .elem = elem, .pf = pf, .hook = hook, .indev = indev, @@ -225,7 +225,7 @@ static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) #endif int nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, @@ -323,7 +323,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = __nf_queue(skb, &elem->list, entry->pf, entry->hook, + err = __nf_queue(skb, elem, entry->pf, entry->hook, entry->indev, entry->outdev, entry->okfn, verdict >> NF_VERDICT_QBITS); if (err < 0) { -- cgit v1.2.1 From ce9f3f31efb88841e4df98794b13dbac8c4901da Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Jun 2012 00:18:31 +0000 Subject: netfilter: properly annotate ipv4_netfilter_{init,fini}() Despite being just a few bytes of code, they should still have proper annotations. Signed-off-by: Jan Beulich Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f1643c0c3587..4c0cf63dd92e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -188,12 +188,12 @@ static const struct nf_afinfo nf_ip_afinfo = { .route_key_size = sizeof(struct ip_rt_info), }; -static int ipv4_netfilter_init(void) +static int __init ipv4_netfilter_init(void) { return nf_register_afinfo(&nf_ip_afinfo); } -static void ipv4_netfilter_fini(void) +static void __exit ipv4_netfilter_fini(void) { nf_unregister_afinfo(&nf_ip_afinfo); } -- cgit v1.2.1 From e812347ccf9e8ce073b0ba0c49d03b124707b2b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Sep 2012 23:57:18 +0000 Subject: net: sock_edemux() should take care of timewait sockets sock_edemux() can handle either a regular socket or a timewait socket Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced8d6a8..7f64467535d1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1523,7 +1523,12 @@ EXPORT_SYMBOL(sock_rfree); void sock_edemux(struct sk_buff *skb) { - sock_put(skb->sk); + struct sock *sk = skb->sk; + + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); } EXPORT_SYMBOL(sock_edemux); -- cgit v1.2.1 From 4c3a5bdae293f75cdf729c6c00124e8489af2276 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 3 Sep 2012 04:27:42 +0000 Subject: sctp: Don't charge for data in sndbuf again when transmitting packet SCTP charges wmem_alloc via sctp_set_owner_w() in sctp_sendmsg() and via skb_set_owner_w() in sctp_packet_transmit(). If a sender runs out of sndbuf it will sleep in sctp_wait_for_sndbuf() and expects to be waken up by __sctp_write_space(). Buffer space charged via sctp_set_owner_w() is released in sctp_wfree() which calls __sctp_write_space() directly. Buffer space charged via skb_set_owner_w() is released via sock_wfree() which calls sk->sk_write_space() _if_ SOCK_USE_WRITE_QUEUE is not set. sctp_endpoint_init() sets SOCK_USE_WRITE_QUEUE on all sockets. Therefore if sctp_packet_transmit() manages to queue up more than sndbuf bytes, sctp_wait_for_sndbuf() will never be woken up again unless it is interrupted by a signal. This could be fixed by clearing the SOCK_USE_WRITE_QUEUE flag but ... Charging for the data twice does not make sense in the first place, it leads to overcharging sndbuf by a factor 2. Therefore this patch only charges a single byte in wmem_alloc when transmitting an SCTP packet to ensure that the socket stays alive until the packet has been released. This means that control chunks are no longer accounted for in wmem_alloc which I believe is not a problem as skb->truesize will typically lead to overcharging anyway and thus compensates for any control overhead. Signed-off-by: Thomas Graf CC: Vlad Yasevich CC: Neil Horman CC: David Miller Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..be50aa234dcd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -364,6 +364,25 @@ finish: return retval; } +static void sctp_packet_release_owner(struct sk_buff *skb) +{ + sk_free(skb->sk); +} + +static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sctp_packet_release_owner; + + /* + * The data chunks have already been accounted for in sctp_sendmsg(), + * therefore only reserve a single byte to keep socket around until + * the packet has been transmitted. + */ + atomic_inc(&sk->sk_wmem_alloc); +} + /* All packets are sent to the network through this function from * sctp_outq_tail(). * @@ -405,7 +424,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Set the owning socket so that we know where to get the * destination IP address. */ - skb_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(nskb, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); -- cgit v1.2.1 From 09484d1f6edb3b798c5926a6755f1badb77ece9f Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 2 Sep 2012 17:38:02 +0000 Subject: tcp: move tcp_enter_cwr() To prepare replacing rate halving with PRR algorithm in CWR state. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8c304a400798..3ab0c7573a0b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -743,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -/* Set slow start threshold and cwnd not falling to slow start */ -void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) -{ - struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - - tp->prior_ssthresh = 0; - tp->bytes_acked = 0; - if (icsk->icsk_ca_state < TCP_CA_CWR) { - tp->undo_marker = 0; - if (set_ssthresh) - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); - - tcp_set_ca_state(sk, TCP_CA_CWR); - } -} - /* * Packet counting of FACK is based on in-order assumptions, therefore TCP * disables it when reordering is detected @@ -2741,6 +2718,29 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +/* Set slow start threshold and cwnd not falling to slow start */ +void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) +{ + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + + tp->prior_ssthresh = 0; + tp->bytes_acked = 0; + if (icsk->icsk_ca_state < TCP_CA_CWR) { + tp->undo_marker = 0; + if (set_ssthresh) + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tp->snd_cwnd = min(tp->snd_cwnd, + tcp_packets_in_flight(tp) + 1U); + tp->snd_cwnd_cnt = 0; + tp->high_seq = tp->snd_nxt; + tp->snd_cwnd_stamp = tcp_time_stamp; + TCP_ECN_queue_cwr(tp); + + tcp_set_ca_state(sk, TCP_CA_CWR); + } +} + static void tcp_try_keep_open(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.1 From fb4d3d1df31907eadd2e2a745e840921888b346a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 2 Sep 2012 17:38:03 +0000 Subject: tcp: move tcp_update_cwnd_in_recovery To prepare replacing rate halving with PRR algorithm in CWR state. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 64 ++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ab0c7573a0b..38589e464e63 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2700,6 +2700,38 @@ static bool tcp_try_undo_loss(struct sock *sk) return false; } +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2854,38 +2886,6 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); -/* This function implements the PRR algorithm, specifcally the PRR-SSRB - * (proportional rate reduction with slow start reduction bound) as described in - * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. - * It computes the number of packets to send (sndcnt) based on packets newly - * delivered: - * 1) If the packets in flight is larger than ssthresh, PRR spreads the - * cwnd reductions across a full RTT. - * 2) If packets in flight is lower than ssthresh (such as due to excess - * losses and/or application stalls), do not perform any further cwnd - * reductions, but instead slow start up to ssthresh. - */ -static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, - int fast_rexmit, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int sndcnt = 0; - int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - - if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { - u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + - tp->prior_cwnd - 1; - sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); - } - - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; -} - static void tcp_enter_recovery(struct sock *sk, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.1 From 684bad1107571d35610a674c61b3544efb5a5b13 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 2 Sep 2012 17:38:04 +0000 Subject: tcp: use PRR to reduce cwin in CWR state Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state, in addition to Recovery state. Retire the current rate-halving in CWR. When losses are detected via ACKs in CWR state, the sender enters Recovery state but the cwnd reduction continues and does not restart. Rename and refactor cwnd reduction functions since both CWR and Recovery use the same algorithm: tcp_init_cwnd_reduction() is new and initiates reduction state variables. tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery(). tcp_ends_cwnd_reduction() is previously tcp_complete_cwr(). The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(), and the cwnd moderation inside tcp_enter_cwr() are removed. The unused parameter, flag, in tcp_cwnd_reduction() is also removed. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 119 +++++++++++++++++--------------------------------- net/ipv4/tcp_output.c | 6 +-- 2 files changed, 44 insertions(+), 81 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38589e464e63..e2bec815ff23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } -/* Lower bound on congestion window is slow start threshold - * unless congestion avoidance choice decides to overide it. - */ -static inline u32 tcp_cwnd_min(const struct sock *sk) -{ - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - - return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; -} - -/* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int decr = tp->snd_cwnd_cnt + 1; - - if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || - (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { - tp->snd_cwnd_cnt = decr & 1; - decr >>= 1; - - if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) - tp->snd_cwnd -= decr; - - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - tp->snd_cwnd_stamp = tcp_time_stamp; - } -} - /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ @@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk) return false; } -/* This function implements the PRR algorithm, specifcally the PRR-SSRB - * (proportional rate reduction with slow start reduction bound) as described in - * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. +/* The cwnd reduction in CWR and Recovery use the PRR algorithm + * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ * It computes the number of packets to send (sndcnt) based on packets newly * delivered: * 1) If the packets in flight is larger than ssthresh, PRR spreads the @@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk) * losses and/or application stalls), do not perform any further cwnd * reductions, but instead slow start up to ssthresh. */ -static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, - int fast_rexmit, int flag) +static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->high_seq = tp->snd_nxt; + tp->bytes_acked = 0; + tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; + if (set_ssthresh) + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + TCP_ECN_queue_cwr(tp); +} + +static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, + int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; @@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; } -static inline void tcp_complete_cwr(struct sock *sk) +static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery. */ - if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_stamp = tcp_time_stamp; - } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { - /* PRR algorithm. */ - tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; - } + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || + (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + tp->snd_cwnd = tp->snd_ssthresh; + tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -/* Set slow start threshold and cwnd not falling to slow start */ +/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); tp->prior_ssthresh = 0; tp->bytes_acked = 0; - if (icsk->icsk_ca_state < TCP_CA_CWR) { + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; - if (set_ssthresh) - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); - + tcp_init_cwnd_reduction(sk, set_ssthresh); tcp_set_ca_state(sk, TCP_CA_CWR); } } @@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag) +static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(sk, flag); + tcp_cwnd_reduction(sk, newly_acked_sacked, 0); } } @@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; @@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); - TCP_ECN_queue_cwr(tp); + tcp_init_cwnd_reduction(sk, true); } - - tp->bytes_acked = 0; - tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; - tp->prr_delivered = 0; - tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); } @@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(sk); + tcp_end_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk)) return; - tcp_complete_cwr(sk); + tcp_end_cwnd_reduction(sk); break; } } @@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag); + tcp_try_to_open(sk, flag, newly_acked_sacked); return; } @@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tp->prr_delivered += newly_acked_sacked; - tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); + tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); + !tcp_in_cwnd_reduction(sk); } /* Check that window update is acceptable. @@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) } /* A conservative spurious RTO response algorithm: reduce cwnd using - * rate halving and continue in congestion avoidance. + * PRR and continue in congestion avoidance. */ -static void tcp_ratehalving_spur_to_response(struct sock *sk) +static void tcp_cwr_spur_to_response(struct sock *sk) { tcp_enter_cwr(sk, 0); } @@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) static void tcp_undo_spur_to_response(struct sock *sk, int flag) { if (flag & FLAG_ECE) - tcp_ratehalving_spur_to_response(sk); + tcp_cwr_spur_to_response(sk); else tcp_undo_cwr(sk, true); } @@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) tcp_conservative_spur_to_response(tp); break; default: - tcp_ratehalving_spur_to_response(sk); + tcp_cwr_spur_to_response(sk); break; } tp->frto_counter = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9383b51f3efc..cfe6ffe1c177 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (push_one) break; } - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += sent_pkts; if (likely(sent_pkts)) { + if (tcp_in_cwnd_reduction(sk)) + tp->prr_out += sent_pkts; tcp_cwnd_validate(sk); return false; } @@ -2542,7 +2542,7 @@ begin_fwd: } NET_INC_STATS_BH(sock_net(sk), mib_idx); - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); if (skb == tcp_write_queue_head(sk)) -- cgit v1.2.1 From b379135c40163ae79ba7a54e6928b53983e74ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Sep 2012 03:19:57 +0000 Subject: fq_codel: dont reinit flow state When fq_codel builds a new flow, it should not reset codel state. Codel algo needs to get previous values (lastcount, drop_next) to get proper behavior. Signed-off-by: Dave Taht Signed-off-by: Eric Dumazet Acked-by: Dave Taht Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9fc1c62ec80e..4e606fcb2534 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -191,7 +191,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); - codel_vars_init(&flow->cvars); q->new_flow_count++; flow->deficit = q->quantum; flow->dropped = 0; @@ -418,6 +417,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) struct fq_codel_flow *flow = q->flows + i; INIT_LIST_HEAD(&flow->flowchain); + codel_vars_init(&flow->cvars); } } if (sch->limit >= 1) -- cgit v1.2.1 From c303aa94cdae483a7577230e61720e126e600a52 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Sep 2012 19:06:27 -0700 Subject: openvswitch: Fix FLOW_BUFSIZE definition. The vlan encapsulation fields in the maximum flow defintion were never updated when the representation changed before upstreaming. In theory this could cause a kernel panic when a maximum length flow is used. In practice this has never happened (to my knowledge) because skb allocations are padded out to a cache line so you would need the right combination of flow and packet being sent to userspace. Signed-off-by: Jesse Gross --- net/openvswitch/flow.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9b75617ca4e0..c30df1a10c67 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -145,15 +145,17 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 - * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 + * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation) + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype) * OVS_KEY_ATTR_IPV6 40 -- 4 44 * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 132 + * total 144 */ -#define FLOW_BUFSIZE 132 +#define FLOW_BUFSIZE 144 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, -- cgit v1.2.1 From 15eac2a74277bc7de68a7c2a64a7c91b4b6f5961 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 23 Aug 2012 12:40:54 -0700 Subject: openvswitch: Increase maximum number of datapath ports. Use hash table to store ports of datapath. Allow 64K ports per switch. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 2 +- net/openvswitch/datapath.c | 110 +++++++++++++++++++++++++++++++-------------- net/openvswitch/datapath.h | 33 +++++++++++--- net/openvswitch/flow.c | 11 ++--- net/openvswitch/flow.h | 3 +- net/openvswitch/vport.c | 1 + net/openvswitch/vport.h | 4 +- 7 files changed, 113 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f3f96badf5aa..0da687769f56 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) if (unlikely(!skb)) return -ENOMEM; - vport = rcu_dereference(dp->ports[out_port]); + vport = ovs_vport_rcu(dp, out_port); if (unlikely(!vport)) { kfree_skb(skb); return -ENODEV; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cad39fca75a9..105a0b5adc51 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -116,7 +116,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) /* Must be called with rcu_read_lock or RTNL lock. */ const char *ovs_dp_name(const struct datapath *dp) { - struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); + struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); } @@ -127,7 +127,7 @@ static int get_dpifindex(struct datapath *dp) rcu_read_lock(); - local = rcu_dereference(dp->ports[OVSP_LOCAL]); + local = ovs_vport_rcu(dp, OVSP_LOCAL); if (local) ifindex = local->ops->get_ifindex(local); else @@ -145,9 +145,30 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); + kfree(dp->ports); kfree(dp); } +static struct hlist_head *vport_hash_bucket(const struct datapath *dp, + u16 port_no) +{ + return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; +} + +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) +{ + struct vport *vport; + struct hlist_node *n; + struct hlist_head *head; + + head = vport_hash_bucket(dp, port_no); + hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + if (vport->port_no == port_no) + return vport; + } + return NULL; +} + /* Called with RTNL lock and genl_lock. */ static struct vport *new_vport(const struct vport_parms *parms) { @@ -156,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms) vport = ovs_vport_add(parms); if (!IS_ERR(vport)) { struct datapath *dp = parms->dp; + struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); - rcu_assign_pointer(dp->ports[parms->port_no], vport); - list_add(&vport->node, &dp->port_list); + hlist_add_head_rcu(&vport->dp_hash_node, head); } return vport; @@ -170,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p) ASSERT_RTNL(); /* First drop references to device. */ - list_del(&p->node); - rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + hlist_del_rcu(&p->dp_hash_node); /* Then destroy it. */ ovs_vport_del(p); @@ -1248,7 +1268,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err; + int err, i; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1261,7 +1281,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (dp == NULL) goto err_unlock_rtnl; - INIT_LIST_HEAD(&dp->port_list); ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ @@ -1276,6 +1295,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) { + err = -ENOMEM; + goto err_destroy_percpu; + } + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1290,7 +1319,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err == -EBUSY) err = -EEXIST; - goto err_destroy_percpu; + goto err_destroy_ports_array; } reply = ovs_dp_cmd_build_info(dp, info->snd_pid, @@ -1309,7 +1338,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) return 0; err_destroy_local_port: - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); +err_destroy_ports_array: + kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: @@ -1326,15 +1357,21 @@ err: /* Called with genl_mutex. */ static void __dp_destroy(struct datapath *dp) { - struct vport *vport, *next_vport; + int i; rtnl_lock(); - list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) - if (vport->port_no != OVSP_LOCAL) - ovs_dp_detach_port(vport); + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + struct hlist_node *node, *n; + + hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + if (vport->port_no != OVSP_LOCAL) + ovs_dp_detach_port(vport); + } list_del(&dp->list_node); - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); /* rtnl_unlock() will wait until all the references to devices that * are pending unregistration have been dropped. We do it here to @@ -1566,7 +1603,7 @@ static struct vport *lookup_vport(struct net *net, if (!dp) return ERR_PTR(-ENODEV); - vport = rcu_dereference_rtnl(dp->ports[port_no]); + vport = ovs_vport_rtnl_rcu(dp, port_no); if (!vport) return ERR_PTR(-ENOENT); return vport; @@ -1603,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (port_no >= DP_MAX_PORTS) goto exit_unlock; - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl_rcu(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock; @@ -1613,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -EFBIG; goto exit_unlock; } - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl(dp, port_no); if (!vport) break; } @@ -1755,32 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; - u32 port_no; - int retval; + int bucket = cb->args[0], skip = cb->args[1]; + int i, j = 0; dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; rcu_read_lock(); - for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { + for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - - vport = rcu_dereference(dp->ports[port_no]); - if (!vport) - continue; - - if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_NEW) < 0) - break; + struct hlist_node *n; + + j = 0; + hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + if (j >= skip && + ovs_vport_cmd_fill_info(vport, skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + OVS_VPORT_CMD_NEW) < 0) + goto out; + + j++; + } + skip = 0; } +out: rcu_read_unlock(); - cb->args[0] = port_no; - retval = skb->len; + cb->args[0] = i; + cb->args[1] = j; - return retval; + return skb->len; } static struct genl_ops dp_vport_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 771c11e13e34..129ec5480758 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -29,7 +29,9 @@ #include "flow.h" #include "vport.h" -#define DP_MAX_PORTS 1024 +#define DP_MAX_PORTS USHRT_MAX +#define DP_VPORT_HASH_BUCKETS 1024 + #define SAMPLE_ACTION_DEPTH 3 /** @@ -57,10 +59,8 @@ struct dp_stats_percpu { * @list_node: Element in global 'dps' list. * @n_flows: Number of flows currently in flow table. * @table: Current flow table. Protected by genl_lock and RCU. - * @ports: Map from port number to &struct vport. %OVSP_LOCAL port - * always exists, other ports may be %NULL. Protected by RTNL and RCU. - * @port_list: List of all ports in @ports in arbitrary order. RTNL required - * to iterate or modify. + * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by + * RTNL and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. * @@ -75,8 +75,7 @@ struct datapath { struct flow_table __rcu *table; /* Switch ports. */ - struct vport __rcu *ports[DP_MAX_PORTS]; - struct list_head port_list; + struct hlist_head *ports; /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; @@ -87,6 +86,26 @@ struct datapath { #endif }; +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); + +static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no) +{ + ASSERT_RTNL(); + return ovs_lookup_vport(dp, port_no); +} + /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b7f38b161909..f9f211d95ebe 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - /* At least DP_MAX_PORTS actions are required to be able to flood a - * packet to every port. Factor of 2 allows for setting VLAN tags, - * etc. */ - if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4)) + if (actions_len > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); @@ -1000,7 +997,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->phy.in_port = in_port; attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); } else { - swkey->phy.in_port = USHRT_MAX; + swkey->phy.in_port = DP_MAX_PORTS; } /* Data attributes. */ @@ -1143,7 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, const struct nlattr *nla; int rem; - *in_port = USHRT_MAX; + *in_port = DP_MAX_PORTS; *priority = 0; nla_for_each_nested(nla, attr, rem) { @@ -1180,7 +1177,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; - if (swkey->phy.in_port != USHRT_MAX && + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9b75617ca4e0..d92e22a638cf 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -43,7 +43,7 @@ struct sw_flow_actions { struct sw_flow_key { struct { u32 priority; /* Packet QoS priority. */ - u16 in_port; /* Input switch port (or USHRT_MAX). */ + u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ @@ -161,6 +161,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, const struct nlattr *); +#define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9873acea9785..1abd9609ba78 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -127,6 +127,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->port_no = parms->port_no; vport->upcall_pid = parms->upcall_pid; vport->ops = ops; + INIT_HLIST_NODE(&vport->dp_hash_node); vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); if (!vport->percpu_stats) { diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 97cef08d981b..c56e4836e93b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,10 +70,10 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @node: Element in @dp's @port_list. * @upcall_pid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. + * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @stats_lock: Protects @err_stats; @@ -83,10 +83,10 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - struct list_head node; u32 upcall_pid; struct hlist_node hash_node; + struct hlist_node dp_hash_node; const struct vport_ops *ops; struct vport_percpu_stats __percpu *percpu_stats; -- cgit v1.2.1 From 0ef24e528f27683620d78c3393b3df73d804828c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 21 Aug 2012 12:43:35 +0300 Subject: mac80211: Do not check for valid hw_queues for P2P_DEVICE A P2P Device interface does not have a netdev, and is not expected to be used for transmitting data, so there is no need to assign hw queues for it. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 59f8adc2aa5f..d747da541747 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -278,13 +278,15 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) int n_queues = sdata->local->hw.queues; int i; - for (i = 0; i < IEEE80211_NUM_ACS; i++) { - if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == - IEEE80211_INVAL_HW_QUEUE)) - return -EINVAL; - if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >= - n_queues)) - return -EINVAL; + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == + IEEE80211_INVAL_HW_QUEUE)) + return -EINVAL; + if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >= + n_queues)) + return -EINVAL; + } } if ((sdata->vif.type != NL80211_IFTYPE_AP) || -- cgit v1.2.1 From 3d2abdfdf14f4d6decc2023708211e19b096f4ca Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 4 Sep 2012 17:44:45 +0300 Subject: mac80211: clear bssid on auth/assoc failure ifmgd->bssid wasn't cleared properly in some auth/assoc failure cases, causing mac80211 and the low-level driver to go out of sync. Clear ifmgd->bssid on failure, and notify the driver. Cc: stable@kernel.org # 3.4+ Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a4a5acdbaa4d..f76b83341cf9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3248,6 +3248,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, goto out_unlock; err_clear: + memset(ifmgd->bssid, 0, ETH_ALEN); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->auth_data = NULL; err_free: kfree(auth_data); @@ -3439,6 +3441,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, err = 0; goto out; err_clear: + memset(ifmgd->bssid, 0, ETH_ALEN); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); -- cgit v1.2.1 From b4e4f47e940bc93c5b1125a4429ff53956754800 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Sep 2012 21:41:04 +0800 Subject: nl80211: fix possible memory leak nl80211_connect() connkeys is malloced in nl80211_parse_connkeys() and should be freed in the error handling case, otherwise it will cause memory leak. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 97026f3b215a..1e37dbf00cb3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5633,8 +5633,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { - if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) + if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) { + kfree(connkeys); return -EINVAL; + } memcpy(&connect.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(connect.ht_capa)); -- cgit v1.2.1 From 6cf5c951175abcec4da470c50565cc0afe6cd11d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 4 Sep 2012 04:13:18 +0000 Subject: netrom: copy_datagram_iovec can fail Check for an error from this and if so bail properly. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 06592d8b4a2b..1b9024ee963c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1169,7 +1169,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_TRUNC; } - skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (er < 0) { + skb_free_datagram(sk, skb); + release_sock(sk); + return er; + } if (sax != NULL) { sax->sax25_family = AF_NETROM; -- cgit v1.2.1 From 37159ef2c1ae1e696b24b260b241209a19f92c60 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 07:18:57 +0000 Subject: l2tp: fix a lockdep splat Fixes following lockdep splat : [ 1614.734896] ============================================= [ 1614.734898] [ INFO: possible recursive locking detected ] [ 1614.734901] 3.6.0-rc3+ #782 Not tainted [ 1614.734903] --------------------------------------------- [ 1614.734905] swapper/11/0 is trying to acquire lock: [ 1614.734907] (slock-AF_INET){+.-...}, at: [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.734920] [ 1614.734920] but task is already holding lock: [ 1614.734922] (slock-AF_INET){+.-...}, at: [] tcp_v4_err+0x163/0x6b0 [ 1614.734932] [ 1614.734932] other info that might help us debug this: [ 1614.734935] Possible unsafe locking scenario: [ 1614.734935] [ 1614.734937] CPU0 [ 1614.734938] ---- [ 1614.734940] lock(slock-AF_INET); [ 1614.734943] lock(slock-AF_INET); [ 1614.734946] [ 1614.734946] *** DEADLOCK *** [ 1614.734946] [ 1614.734949] May be due to missing lock nesting notation [ 1614.734949] [ 1614.734952] 7 locks held by swapper/11/0: [ 1614.734954] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb+0x251/0xd00 [ 1614.734964] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x4c/0x4e0 [ 1614.734972] #2: (rcu_read_lock){.+.+..}, at: [] icmp_socket_deliver+0x46/0x230 [ 1614.734982] #3: (slock-AF_INET){+.-...}, at: [] tcp_v4_err+0x163/0x6b0 [ 1614.734989] #4: (rcu_read_lock){.+.+..}, at: [] ip_queue_xmit+0x0/0x680 [ 1614.734997] #5: (rcu_read_lock_bh){.+....}, at: [] ip_finish_output+0x135/0x890 [ 1614.735004] #6: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x0/0xe00 [ 1614.735012] [ 1614.735012] stack backtrace: [ 1614.735016] Pid: 0, comm: swapper/11 Not tainted 3.6.0-rc3+ #782 [ 1614.735018] Call Trace: [ 1614.735020] [] __lock_acquire+0x144c/0x1b10 [ 1614.735033] [] ? check_usage+0x9b/0x4d0 [ 1614.735037] [] ? mark_held_locks+0x82/0x130 [ 1614.735042] [] lock_acquire+0x90/0x200 [ 1614.735047] [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735051] [] ? trace_hardirqs_on+0xd/0x10 [ 1614.735060] [] _raw_spin_lock+0x41/0x50 [ 1614.735065] [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735069] [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735075] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [ 1614.735079] [] dev_hard_start_xmit+0x502/0xa70 [ 1614.735083] [] ? dev_hard_start_xmit+0x5e/0xa70 [ 1614.735087] [] ? dev_queue_xmit+0x141/0xe00 [ 1614.735093] [] sch_direct_xmit+0xfe/0x290 [ 1614.735098] [] dev_queue_xmit+0x1e5/0xe00 [ 1614.735102] [] ? dev_hard_start_xmit+0xa70/0xa70 [ 1614.735106] [] ? eth_header+0x3a/0xf0 [ 1614.735111] [] ? fib_get_table+0x2e/0x280 [ 1614.735117] [] arp_xmit+0x22/0x60 [ 1614.735121] [] arp_send+0x43/0x50 [ 1614.735125] [] arp_solicit+0x18f/0x450 [ 1614.735132] [] neigh_probe+0x4a/0x70 [ 1614.735137] [] __neigh_event_send+0xea/0x300 [ 1614.735141] [] neigh_resolve_output+0x163/0x260 [ 1614.735146] [] ip_finish_output+0x505/0x890 [ 1614.735150] [] ? ip_finish_output+0x135/0x890 [ 1614.735154] [] ip_output+0x59/0xf0 [ 1614.735158] [] ip_local_out+0x2d/0xa0 [ 1614.735162] [] ip_queue_xmit+0x1c3/0x680 [ 1614.735165] [] ? ip_local_out+0xa0/0xa0 [ 1614.735172] [] tcp_transmit_skb+0x402/0xa60 [ 1614.735177] [] tcp_retransmit_skb+0x1a1/0x620 [ 1614.735181] [] tcp_retransmit_timer+0x393/0x960 [ 1614.735185] [] ? tcp_v4_err+0x163/0x6b0 [ 1614.735189] [] tcp_v4_err+0x657/0x6b0 [ 1614.735194] [] ? icmp_socket_deliver+0x46/0x230 [ 1614.735199] [] icmp_socket_deliver+0xce/0x230 [ 1614.735203] [] ? icmp_socket_deliver+0x46/0x230 [ 1614.735208] [] icmp_unreach+0xe4/0x2c0 [ 1614.735213] [] icmp_rcv+0x350/0x4a0 [ 1614.735217] [] ip_local_deliver_finish+0x135/0x4e0 [ 1614.735221] [] ? ip_local_deliver_finish+0x4c/0x4e0 [ 1614.735225] [] ip_local_deliver+0x4a/0x90 [ 1614.735229] [] ip_rcv_finish+0x187/0x730 [ 1614.735233] [] ip_rcv+0x21d/0x300 [ 1614.735237] [] __netif_receive_skb+0x46b/0xd00 [ 1614.735241] [] ? __netif_receive_skb+0x251/0xd00 [ 1614.735245] [] process_backlog+0xb8/0x180 [ 1614.735249] [] net_rx_action+0x159/0x330 [ 1614.735257] [] __do_softirq+0xd0/0x3e0 [ 1614.735264] [] ? tick_program_event+0x24/0x30 [ 1614.735270] [] call_softirq+0x1c/0x30 [ 1614.735278] [] do_softirq+0x8d/0xc0 [ 1614.735282] [] irq_exit+0xae/0xe0 [ 1614.735287] [] smp_apic_timer_interrupt+0x6e/0x99 [ 1614.735291] [] apic_timer_interrupt+0x6c/0x80 [ 1614.735293] [] ? trace_hardirqs_off+0xd/0x10 [ 1614.735306] [] ? intel_idle+0xf5/0x150 [ 1614.735310] [] ? intel_idle+0xee/0x150 [ 1614.735317] [] cpuidle_enter+0x19/0x20 [ 1614.735321] [] cpuidle_idle_call+0xa8/0x630 [ 1614.735327] [] cpu_idle+0x8a/0xe0 [ 1614.735333] [] start_secondary+0x220/0x222 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 513cab08a986..1a9f3723c13c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1501,6 +1501,8 @@ out: return err; } +static struct lock_class_key l2tp_socket_class; + int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; @@ -1605,6 +1607,8 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); + sk->sk_allocation = GFP_ATOMIC; /* Add tunnel to our list */ -- cgit v1.2.1 From 3b59df46a449ec9975146d71318c4777ad086744 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 4 Sep 2012 00:03:29 +0000 Subject: xfrm: Workaround incompatibility of ESN and async crypto ESN for esp is defined in RFC 4303. This RFC assumes that the sequence number counters are always up to date. However, this is not true if an async crypto algorithm is employed. If the sequence number counters are not up to date on sequence number check, we may incorrectly update the upper 32 bit of the sequence number. This leads to a DOS. We workaround this by comparing the upper sequence number, (used for authentication) with the upper sequence number computed after the async processing. We drop the packet if these numbers are different. To do this, we introduce a recheck function that does this check in the ESN case. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 54a0dc2e2f8d..ab2bb42fe094 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,7 +212,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->check(x, skb, seq)) { + if (async && x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 2f6d11d04a2b..3efb07d3eb27 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -420,6 +420,18 @@ err: return -EINVAL; } +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, + .recheck = xfrm_replay_check, .notify = xfrm_replay_notify, .overflow = xfrm_replay_overflow, }; @@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = { static struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_bmp, }; @@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = { static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_esn, }; -- cgit v1.2.1 From 54a27924237eeb9767135a423dea14a0d1b5954f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 3 Sep 2012 23:58:16 +0000 Subject: sctp: use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 072bf6ae3c26..d16632e1503a 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -591,9 +591,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_del(&chunk->transmitted_list); - list_add_tail(&chunk->transmitted_list, - &transport->transmitted); + list_move_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } @@ -657,9 +656,8 @@ redo: /* The append was successful, so add this chunk to * the transmitted list. */ - list_del(&chunk->transmitted_list); - list_add_tail(&chunk->transmitted_list, - &transport->transmitted); + list_move_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. -- cgit v1.2.1 From 798b2cbf9227b1bd7d37ae9af4d9c750e6f4de9c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Sep 2012 14:20:14 -0400 Subject: net: Add INET dependency on aes crypto for the sake of TCP fastopen. Stephen Rothwell says: ==================== After merging the final tree, today's linux-next build (powerpc ppc44x_defconfig) failed like this: net/built-in.o: In function `tcp_fastopen_ctx_free': tcp_fastopen.c:(.text+0x5cc5c): undefined reference to `crypto_destroy_tfm' net/built-in.o: In function `tcp_fastopen_reset_cipher': (.text+0x5cccc): undefined reference to `crypto_alloc_base' net/built-in.o: In function `tcp_fastopen_reset_cipher': (.text+0x5cd6c): undefined reference to `crypto_destroy_tfm' Presumably caused by commit 104671636897 ("tcp: TCP Fast Open Server - header & support functions") from the net-next tree. I assume that some dependency on the CRYPTO infrastructure is missing. I have reverted commit 1bed966cc3bd ("Merge branch 'tcp_fastopen_server'") for today. ==================== Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 245831bec09a..30b48f523135 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -52,6 +52,8 @@ source "net/iucv/Kconfig" config INET bool "TCP/IP networking" + select CRYPTO + select CRYPTO_AES ---help--- These are the protocols used on the Internet and on most local Ethernets. It is highly recommended to say Y here (this will enlarge -- cgit v1.2.1 From 600e177920df936d03b807780ca92c662af98990 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Wed, 29 Aug 2012 10:44:29 +0000 Subject: net: Providing protocol type via system.sockprotoname xattr of /proc/PID/fd entries lsof reports some of socket descriptors as "can't identify protocol" like: [yamato@localhost]/tmp% sudo lsof | grep dbus | grep iden dbus-daem 652 dbus 6u sock ... 17812 can't identify protocol dbus-daem 652 dbus 34u sock ... 24689 can't identify protocol dbus-daem 652 dbus 42u sock ... 24739 can't identify protocol dbus-daem 652 dbus 48u sock ... 22329 can't identify protocol ... lsof cannot resolve the protocol used in a socket because procfs doesn't provide the map between inode number on sockfs and protocol type of the socket. For improving the situation this patch adds an extended attribute named 'system.sockprotoname' in which the protocol name for /proc/PID/fd/SOCKET is stored. So lsof can know the protocol for a given /proc/PID/fd/SOCKET with getxattr system call. A few weeks ago I submitted a patch for the same purpose. The patch was introduced /proc/net/sockfs which enumerates inodes and protocols of all sockets alive on a system. However, it was rejected because (1) a global lock was needed, and (2) the layout of struct socket was changed with the patch. This patch doesn't use any global lock; and doesn't change the layout of any structs. In this patch, a protocol name is stored to dentry->d_name of sockfs when new socket is associated with a file descriptor. Before this patch dentry->d_name was not used; it was just filled with empty string. lsof may use an extended attribute named 'system.sockprotoname' to retrieve the value of dentry->d_name. It is nice if we can see the protocol name with ls -l /proc/PID/fd. However, "socket:[#INODE]", the name format returned from sockfs_dname() was already defined. To keep the compatibility between kernel and user land, the extended attribute is used to prepare the value of dentry->d_name. Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- net/socket.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index a5471f804d99..977c0f48c8d6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,6 +88,7 @@ #include #include #include +#include #include #include @@ -346,7 +347,8 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags) +static int sock_alloc_file(struct socket *sock, struct file **f, int flags, + const char *dname) { struct qstr name = { .name = "" }; struct path path; @@ -357,6 +359,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; + if (dname) { + name.name = dname; + name.len = strlen(name.name); + } else if (sock->sk) { + name.name = sock->sk->sk_prot_creator->name; + name.len = strlen(name.name); + } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); @@ -389,7 +398,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags); + int fd = sock_alloc_file(sock, &newfile, flags, NULL); if (likely(fd >= 0)) fd_install(fd, newfile); @@ -455,6 +464,68 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) return NULL; } +#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" +#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) +#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) +static ssize_t sockfs_getxattr(struct dentry *dentry, + const char *name, void *value, size_t size) +{ + const char *proto_name; + size_t proto_size; + int error; + + error = -ENODATA; + if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { + proto_name = dentry->d_name.name; + proto_size = strlen(proto_name); + + if (value) { + error = -ERANGE; + if (proto_size + 1 > size) + goto out; + + strncpy(value, proto_name, proto_size + 1); + } + error = proto_size + 1; + } + +out: + return error; +} + +static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, + size_t size) +{ + ssize_t len; + ssize_t used = 0; + + len = security_inode_listsecurity(dentry->d_inode, buffer, size); + if (len < 0) + return len; + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + buffer += len; + } + + len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); + buffer += len; + } + + return used; +} + +static const struct inode_operations sockfs_inode_ops = { + .getxattr = sockfs_getxattr, + .listxattr = sockfs_listxattr, +}; + /** * sock_alloc - allocate a socket * @@ -479,6 +550,7 @@ static struct socket *sock_alloc(void) inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); + inode->i_op = &sockfs_inode_ops; this_cpu_add(sockets_in_use, 1); return sock; @@ -1394,13 +1466,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags); + fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_file(sock2, &newfile2, flags); + fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL); if (unlikely(fd2 < 0)) { err = fd2; fput(newfile1); @@ -1536,7 +1608,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags); + newfd = sock_alloc_file(newsock, &newfile, flags, + sock->sk->sk_prot_creator->name); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); -- cgit v1.2.1 From c0cc88a7627c333de50b07b7c60b1d49d9d2e6cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 15:54:55 -0400 Subject: l2tp: fix a typo in l2tp_eth_dev_recv() While investigating l2tp bug, I hit a bug in eth_type_trans(), because not enough bytes were pulled in skb head. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f9ee74deeac2..3bfb34aaee29 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -153,7 +153,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length); } - if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + if (!pskb_may_pull(skb, ETH_HLEN)) goto error; secpath_reset(skb); -- cgit v1.2.1 From 00a9ac4c014e19fb9ceb70a28406fd962df5b8a7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 5 Sep 2012 14:34:12 +0800 Subject: cfg80211: use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). Signed-off-by: Wei Yongjun Signed-off-by: Johannes Berg --- net/wireless/reg.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2303ee73b50a..c6e0d467f8bd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1932,8 +1932,7 @@ static void restore_regulatory_settings(bool reset_user) if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) continue; - list_del(®_request->list); - list_add_tail(®_request->list, &tmp_reg_req_list); + list_move_tail(®_request->list, &tmp_reg_req_list); } } spin_unlock(®_requests_lock); @@ -1992,8 +1991,7 @@ static void restore_regulatory_settings(bool reset_user) "into the queue\n", reg_request->alpha2[0], reg_request->alpha2[1]); - list_del(®_request->list); - list_add_tail(®_request->list, ®_requests_list); + list_move_tail(®_request->list, ®_requests_list); } spin_unlock(®_requests_lock); -- cgit v1.2.1 From 00ea6deb0cb1af8df4ea899330bfd238c93ef164 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2012 15:54:51 +0200 Subject: mac80211: don't use kerneldoc for ieee80211_add_rx_radiotap_header Doing so creates warnings, but the function is internal and not part of the 802.11 docbooks, so it from kerneldoc. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b382605c5733..61c621e9273f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -103,7 +103,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, return len; } -/** +/* * ieee80211_add_rx_radiotap_header - add radiotap header * * add a radiotap header containing all the fields which the hardware provided. -- cgit v1.2.1 From 7ce8c7a3433c6d6f4adfec0611d250782f0b4b0c Mon Sep 17 00:00:00 2001 From: LEO Airwarosu Yoichi Shinoda Date: Mon, 27 Aug 2012 22:28:16 +0900 Subject: mac80211: Various small fixes for cfg.c: mpath_set_pinfo() Various small fixes for net/mac80211/cfg.c:mpath_set_pinfo(): Initialize *pinfo before filling members in, handle MESH_PATH_RESOLVED correctly, and remove bogus assignment; result in correct display of FLAGS values and meaningful EXPTIME for expired paths in iw utility. Signed-off-by: Yoichi Shinoda Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d41974aacf51..a58c0b649ba1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1378,6 +1378,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, else memset(next_hop, 0, ETH_ALEN); + memset(pinfo, 0, sizeof(*pinfo)); + pinfo->generation = mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | @@ -1396,7 +1398,6 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; - pinfo->flags = 0; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) @@ -1405,10 +1406,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; - if (mpath->flags & MESH_PATH_RESOLVING) - pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; - - pinfo->flags = mpath->flags; + if (mpath->flags & MESH_PATH_RESOLVED) + pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, -- cgit v1.2.1 From e5f5b2fb07353de00ffde49221cffad71e2fecfe Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 24 Aug 2012 14:43:31 +0200 Subject: wext: include wireless event id when it has a size problem The wext code checks is the event data is within size limits. When this check fails a message is logged with violating size. This patch adds the event id to put us on the right track for resolving that violation. Reviewed-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index b0eb7aa49b60..c8717c1d082e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -478,13 +478,13 @@ void wireless_send_event(struct net_device * dev, if (descr->header_type == IW_HEADER_TYPE_POINT) { /* Check if number of token fits within bounds */ if (wrqu->data.length > descr->max_tokens) { - netdev_err(dev, "(WE) : Wireless Event too big (%d)\n", - wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too big (%d)\n", + cmd, wrqu->data.length); return; } if (wrqu->data.length < descr->min_tokens) { - netdev_err(dev, "(WE) : Wireless Event too small (%d)\n", - wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too small (%d)\n", + cmd, wrqu->data.length); return; } /* Calculate extra_len - extra is NULL for restricted events */ -- cgit v1.2.1 From 768be59f30f2ee8958ba3718101162c694efab12 Mon Sep 17 00:00:00 2001 From: Hila Gonen Date: Sun, 26 Aug 2012 11:00:28 +0300 Subject: cfg80211: fix indentation checkpatch pointed out an issue, fix it. Signed-off-by: Hila Gonen Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 848523a2b22f..9730c9862bdc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -815,7 +815,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (signal < 0 || signal > 100))) + (signal < 0 || signal > 100))) return NULL; if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) -- cgit v1.2.1 From 00545bec9412d130c77f72a08d6c8b6ad21d4a1e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 5 Sep 2012 18:24:55 +0200 Subject: netfilter: fix crash during boot if NAT has been compiled built-in (c7232c9 netfilter: add protocol independent NAT core) introduced a problem that leads to crashing during boot due to NULL pointer dereference. It seems that xt_nat calls xt_register_target() before xt_init(): net/netfilter/x_tables.c:static struct xt_af *xt; is NULL and we crash on xt_register_target(struct xt_target *target) { u_int8_t af = target->family; int ret; ret = mutex_lock_interruptible(&xt[af].mutex); ... Fix this by changing the linking order, to make sure that x_tables comes before xt_nat. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 98244d4c75f2..0baa3f104fcb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -47,7 +47,6 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o obj-$(CONFIG_NF_NAT) += nf_nat.o -obj-$(CONFIG_NF_NAT) += xt_nat.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o @@ -71,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o +obj-$(CONFIG_NF_NAT) += xt_nat.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o -- cgit v1.2.1 From d23ff701643a4a725e2c7a8ba2d567d39daa29ea Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 4 Sep 2012 11:03:15 +0000 Subject: tcp: add generic netlink support for tcp_metrics Add support for genl "tcp_metrics". No locking is changed, only that now we can unlink and delete entries after grace period. We implement get/del for single entry and dump to support show/flush filtering in user space. Del without address attribute causes flush for all addresses, sadly under genl_mutex. v2: - remove rcu_assign_pointer as suggested by Eric Dumazet, it is not needed because there are no other writes under lock - move the flushing code in tcp_metrics_flush_all v3: - remove synchronize_rcu on flush as suggested by Eric Dumazet Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 341 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0abe67bb4d3a..988edb63ee73 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -17,20 +18,10 @@ #include #include #include +#include int sysctl_tcp_nometrics_save __read_mostly; -enum tcp_metric_index { - TCP_METRIC_RTT, - TCP_METRIC_RTTVAR, - TCP_METRIC_SSTHRESH, - TCP_METRIC_CWND, - TCP_METRIC_REORDERING, - - /* Always last. */ - TCP_METRIC_MAX, -}; - struct tcp_fastopen_metrics { u16 mss; u16 syn_loss:10; /* Recurring Fast Open SYN losses */ @@ -45,8 +36,10 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX]; + u32 tcpm_vals[TCP_METRIC_MAX + 1]; struct tcp_fastopen_metrics tcpm_fastopen; + + struct rcu_head rcu_head; }; static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } +static struct genl_family tcp_metrics_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, +}; + +static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { + [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr), }, + /* Following attributes are not received for GET/DEL, + * we keep them for reference + */ +#if 0 + [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, }, + [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, }, + [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY, + .len = TCP_FASTOPEN_COOKIE_MAX, }, +#endif +}; + +/* Add attributes, caller cancels its header on failure */ +static int tcp_metrics_fill_info(struct sk_buff *msg, + struct tcp_metrics_block *tm) +{ + struct nlattr *nest; + int i; + + switch (tm->tcpm_addr.family) { + case AF_INET: + if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_addr.addr.a4) < 0) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, + tm->tcpm_addr.addr.a6) < 0) + goto nla_put_failure; + break; + default: + return -EAFNOSUPPORT; + } + + if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, + jiffies - tm->tcpm_stamp) < 0) + goto nla_put_failure; + if (tm->tcpm_ts_stamp) { + if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, + (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) + goto nla_put_failure; + if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, + tm->tcpm_ts) < 0) + goto nla_put_failure; + } + + { + int n = 0; + + nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); + if (!nest) + goto nla_put_failure; + for (i = 0; i < TCP_METRIC_MAX + 1; i++) { + if (!tm->tcpm_vals[i]) + continue; + if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + goto nla_put_failure; + n++; + } + if (n) + nla_nest_end(msg, nest); + else + nla_nest_cancel(msg, nest); + } + + { + struct tcp_fastopen_metrics tfom_copy[1], *tfom; + unsigned int seq; + + do { + seq = read_seqbegin(&fastopen_seqlock); + tfom_copy[0] = tm->tcpm_fastopen; + } while (read_seqretry(&fastopen_seqlock, seq)); + + tfom = tfom_copy; + if (tfom->mss && + nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS, + tfom->mss) < 0) + goto nla_put_failure; + if (tfom->syn_loss && + (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, + tfom->syn_loss) < 0 || + nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, + jiffies - tfom->last_syn_loss) < 0)) + goto nla_put_failure; + if (tfom->cookie.len > 0 && + nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, + tfom->cookie.len, tfom->cookie.val) < 0) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int tcp_metrics_dump_info(struct sk_buff *skb, + struct netlink_callback *cb, + struct tcp_metrics_block *tm) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &tcp_metrics_nl_family, NLM_F_MULTI, + TCP_METRICS_CMD_GET); + if (!hdr) + return -EMSGSIZE; + + if (tcp_metrics_fill_info(skb, tm) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int tcp_metrics_nl_dump(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int row, s_row = cb->args[0]; + int s_col = cb->args[1], col = s_col; + + for (row = s_row; row < max_rows; row++, s_col = 0) { + struct tcp_metrics_block *tm; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + + rcu_read_lock(); + for (col = 0, tm = rcu_dereference(hb->chain); tm; + tm = rcu_dereference(tm->tcpm_next), col++) { + if (col < s_col) + continue; + if (tcp_metrics_dump_info(skb, cb, tm) < 0) { + rcu_read_unlock(); + goto done; + } + } + rcu_read_unlock(); + } + +done: + cb->args[0] = row; + cb->args[1] = col; + return skb->len; +} + +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + struct nlattr *a; + + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + if (a) { + addr->family = AF_INET; + addr->addr.a4 = nla_get_be32(a); + *hash = (__force unsigned int) addr->addr.a4; + return 0; + } + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + if (a) { + if (nla_len(a) != sizeof(sizeof(struct in6_addr))) + return -EINVAL; + addr->family = AF_INET6; + memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + return 0; + } + return optional ? 1 : -EAFNOSUPPORT; +} + +static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct sk_buff *msg; + struct net *net = genl_info_net(info); + void *reply; + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 0); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0, + info->genlhdr->cmd); + if (!reply) + goto nla_put_failure; + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + ret = -ESRCH; + rcu_read_lock(); + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + ret = tcp_metrics_fill_info(msg, tm); + break; + } + } + rcu_read_unlock(); + if (ret < 0) + goto out_free; + + genlmsg_end(msg, reply); + return genlmsg_reply(msg, info); + +nla_put_failure: + ret = -EMSGSIZE; + +out_free: + nlmsg_free(msg); + return ret; +} + +#define deref_locked_genl(p) \ + rcu_dereference_protected(p, lockdep_genl_is_held() && \ + lockdep_is_held(&tcp_metrics_lock)) + +#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) + +static int tcp_metrics_flush_all(struct net *net) +{ + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + struct tcp_metrics_block *tm; + unsigned int row; + + for (row = 0; row < max_rows; row++, hb++) { + spin_lock_bh(&tcp_metrics_lock); + tm = deref_locked_genl(hb->chain); + if (tm) + hb->chain = NULL; + spin_unlock_bh(&tcp_metrics_lock); + while (tm) { + struct tcp_metrics_block *next; + + next = deref_genl(tm->tcpm_next); + kfree_rcu(tm, rcu_head); + tm = next; + } + } + return 0; +} + +static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct tcpm_hash_bucket *hb; + struct tcp_metrics_block *tm; + struct tcp_metrics_block __rcu **pp; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net = genl_info_net(info); + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 1); + if (ret < 0) + return ret; + if (ret > 0) + return tcp_metrics_flush_all(net); + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hb = net->ipv4.tcp_metrics_hash + hash; + pp = &hb->chain; + spin_lock_bh(&tcp_metrics_lock); + for (tm = deref_locked_genl(*pp); tm; + pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + *pp = tm->tcpm_next; + break; + } + } + spin_unlock_bh(&tcp_metrics_lock); + if (!tm) + return -ESRCH; + kfree_rcu(tm, rcu_head); + return 0; +} + +static struct genl_ops tcp_metrics_nl_ops[] = { + { + .cmd = TCP_METRICS_CMD_GET, + .doit = tcp_metrics_nl_cmd_get, + .dumpit = tcp_metrics_nl_dump, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TCP_METRICS_CMD_DEL, + .doit = tcp_metrics_nl_cmd_del, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -753,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = { void __init tcp_metrics_init(void) { - register_pernet_subsys(&tcp_net_metrics_ops); + int ret; + + ret = register_pernet_subsys(&tcp_net_metrics_ops); + if (ret < 0) + goto cleanup; + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, + tcp_metrics_nl_ops, + ARRAY_SIZE(tcp_metrics_nl_ops)); + if (ret < 0) + goto cleanup_subsys; + return; + +cleanup_subsys: + unregister_pernet_subsys(&tcp_net_metrics_ops); + +cleanup: + return; } -- cgit v1.2.1 From f21ec3d2d46e5f2ffc06f31fe2704fdcea7a58f3 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 22 Aug 2012 22:13:36 -0400 Subject: serial: add a new helper function In most of the time, the driver needs to check if the cts flow control is enabled. But now, the driver checks the ASYNC_CTS_FLOW flag manually, which is not a grace way. So add a new wraper function to make the code tidy and clean. Signed-off-by: Huang Shijie Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 4 ++-- net/irda/ircomm/ircomm_tty_attach.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 96689906b93c..95a3a7a336ba 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -1070,7 +1070,7 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) goto put; } } - if (tty && self->port.flags & ASYNC_CTS_FLOW) { + if (tty && tty_port_cts_enabled(&self->port)) { if (tty->hw_stopped) { if (status & IRCOMM_CTS) { IRDA_DEBUG(2, @@ -1313,7 +1313,7 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) seq_puts(m, "Flags:"); sep = ' '; - if (self->port.flags & ASYNC_CTS_FLOW) { + if (tty_port_cts_enabled(&self->port)) { seq_printf(m, "%cASYNC_CTS_FLOW", sep); sep = '|'; } diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 3ab70e7a0715..edab393e0c82 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -578,7 +578,7 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) * will have to wait for the peer device (DCE) to raise the CTS * line. */ - if ((self->port.flags & ASYNC_CTS_FLOW) && + if (tty_port_cts_enabled(&self->port) && ((self->settings.dce & IRCOMM_CTS) == 0)) { IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __func__ ); goto put; -- cgit v1.2.1 From d013ef2aba8fe765ca683598e404203215632373 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 5 Sep 2012 10:53:18 +0000 Subject: tcp: fix possible socket refcount problem for ipv6 commit 144d56e91044181ec0ef67aeca91e9a8b5718348 ("tcp: fix possible socket refcount problem") is missing the IPv6 part. As tcp_release_cb is shared by both protocols we should hold sock reference for the TCP_MTU_REDUCED_DEFERRED bit. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a3e60cc04a8a..acd32e3f1b68 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -403,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, + &tp->tsq_flags)) + sock_hold(sk); goto out; } -- cgit v1.2.1 From c6c13965f4ffca6163af0c9419095aedc103648c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Sep 2012 04:11:28 +0000 Subject: net: add unknown state to sysfs NIC duplex export Currently when the NIC duplex state is DUPLEX_UNKNOWN it is exported as full through sysfs, this patch adds support for DUPLEX_UNKNOWN. It is handled the same way as in ethtool. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 72607174ea5a..bcf02f608cbf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -166,9 +166,21 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, "%s\n", - cmd.duplex ? "full" : "half"); + if (!__ethtool_get_settings(netdev, &cmd)) { + const char *duplex; + switch (cmd.duplex) { + case DUPLEX_HALF: + duplex = "half"; + break; + case DUPLEX_FULL: + duplex = "full"; + break; + default: + duplex = "unknown"; + break; + } + ret = sprintf(buf, "%s\n", duplex); + } } rtnl_unlock(); return ret; -- cgit v1.2.1 From 2c9693222952b08c224093a1263cb5aee02d10ff Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 5 Sep 2012 15:34:58 +1000 Subject: netfilter: ipv6: using csum_ipv6_magic requires net/ip6_checksum.h Fixes this build error: net/ipv6/netfilter/nf_nat_l3proto_ipv6.c: In function 'nf_nat_ipv6_csum_recalc': net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:144:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration] Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 81a2d1c3da8e..abfe75a2e316 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include -- cgit v1.2.1 From 23d3b8bfb8eb20e7d96afa09991e6a5ed1c83164 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Sep 2012 01:02:56 +0000 Subject: net: qdisc busylock needs lockdep annotations It seems we need to provide ability for stacked devices to use specific lock_class_key for sch->busylock We could instead default l2tpeth tx_queue_len to 0 (no qdisc), but a user might use a qdisc anyway. (So same fixes are probably needed on non LLTX stacked drivers) Noticed while stressing L2TPV3 setup : ====================================================== [ INFO: possible circular locking dependency detected ] 3.6.0-rc3+ #788 Not tainted ------------------------------------------------------- netperf/4660 is trying to acquire lock: (l2tpsock){+.-...}, at: [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] but task is already holding lock: (&(&sch->busylock)->rlock){+.-...}, at: [] dev_queue_xmit+0xd75/0xe00 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&sch->busylock)->rlock){+.-...}: [] lock_acquire+0x90/0x200 [] _raw_spin_lock_irqsave+0x4c/0x60 [] __wake_up+0x32/0x70 [] tty_wakeup+0x3e/0x80 [] pty_write+0x73/0x80 [] tty_put_char+0x3c/0x40 [] process_echoes+0x142/0x330 [] n_tty_receive_buf+0x8fb/0x1230 [] flush_to_ldisc+0x142/0x1c0 [] process_one_work+0x198/0x760 [] worker_thread+0x186/0x4b0 [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 -> #0 (l2tpsock){+.-...}: [] __lock_acquire+0x1628/0x1b10 [] lock_acquire+0x90/0x200 [] _raw_spin_lock+0x41/0x50 [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [] dev_hard_start_xmit+0x502/0xa70 [] sch_direct_xmit+0xfe/0x290 [] dev_queue_xmit+0x1e5/0xe00 [] ip_finish_output+0x3d0/0x890 [] ip_output+0x59/0xf0 [] ip_local_out+0x2d/0xa0 [] ip_queue_xmit+0x1c3/0x680 [] tcp_transmit_skb+0x402/0xa60 [] tcp_write_xmit+0x1f4/0xa30 [] tcp_push_one+0x30/0x40 [] tcp_sendmsg+0xe82/0x1040 [] inet_sendmsg+0x125/0x230 [] sock_sendmsg+0xdc/0xf0 [] sys_sendto+0xfe/0x130 [] system_call_fastpath+0x16/0x1b Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&sch->busylock)->rlock); lock(l2tpsock); lock(&(&sch->busylock)->rlock); lock(l2tpsock); *** DEADLOCK *** 5 locks held by netperf/4660: #0: (sk_lock-AF_INET){+.+.+.}, at: [] tcp_sendmsg+0x2c/0x1040 #1: (rcu_read_lock){.+.+..}, at: [] ip_queue_xmit+0x0/0x680 #2: (rcu_read_lock_bh){.+....}, at: [] ip_finish_output+0x135/0x890 #3: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x0/0xe00 #4: (&(&sch->busylock)->rlock){+.-...}, at: [] dev_queue_xmit+0xd75/0xe00 stack backtrace: Pid: 4660, comm: netperf Not tainted 3.6.0-rc3+ #788 Call Trace: [] print_circular_bug+0x1fb/0x20c [] __lock_acquire+0x1628/0x1b10 [] ? check_usage+0x9b/0x4d0 [] ? __lock_acquire+0x2e4/0x1b10 [] lock_acquire+0x90/0x200 [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] _raw_spin_lock+0x41/0x50 [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [] dev_hard_start_xmit+0x502/0xa70 [] ? dev_hard_start_xmit+0x5e/0xa70 [] ? dev_queue_xmit+0x141/0xe00 [] sch_direct_xmit+0xfe/0x290 [] dev_queue_xmit+0x1e5/0xe00 [] ? dev_hard_start_xmit+0xa70/0xa70 [] ip_finish_output+0x3d0/0x890 [] ? ip_finish_output+0x135/0x890 [] ip_output+0x59/0xf0 [] ip_local_out+0x2d/0xa0 [] ip_queue_xmit+0x1c3/0x680 [] ? ip_local_out+0xa0/0xa0 [] tcp_transmit_skb+0x402/0xa60 [] ? tcp_md5_do_lookup+0x18e/0x1a0 [] tcp_write_xmit+0x1f4/0xa30 [] tcp_push_one+0x30/0x40 [] tcp_sendmsg+0xe82/0x1040 [] inet_sendmsg+0x125/0x230 [] ? inet_create+0x6b0/0x6b0 [] ? sock_update_classid+0xc2/0x3b0 [] ? sock_update_classid+0x130/0x3b0 [] sock_sendmsg+0xdc/0xf0 [] ? fget_light+0x3f9/0x4f0 [] sys_sendto+0xfe/0x130 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irq+0x30/0x50 [] ? finish_task_switch+0x83/0xf0 [] ? finish_task_switch+0x46/0xf0 [] ? sysret_check+0x1b/0x56 [] system_call_fastpath+0x16/0x1b Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 3 ++- net/sched/sch_generic.c | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f9ee74deeac2..ba89997bcd2e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -67,6 +67,7 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) return net_generic(net, l2tp_eth_net_id); } +static struct lock_class_key l2tp_eth_tx_busylock; static int l2tp_eth_dev_init(struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); @@ -74,7 +75,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) priv->dev = dev; eth_hw_addr_random(dev); memset(&dev->broadcast[0], 0xff, 6); - + dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c4d5fe53ce8..aefc1504dc88 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -527,6 +527,8 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); +static struct lock_class_key qdisc_tx_busylock; + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops) { @@ -534,6 +536,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc *sch; unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; + struct net_device *dev = dev_queue->dev; p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); @@ -553,12 +556,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); + spin_lock_init(&sch->busylock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold(qdisc_dev(sch)); + dev_hold(dev); atomic_set(&sch->refcnt, 1); return sch; -- cgit v1.2.1 From ef2c7d7b59708d54213c7556a82d14de9a7e4475 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 5 Sep 2012 02:12:42 +0000 Subject: ipv6: fix handling of blackhole and prohibit routes When adding a blackhole or a prohibit route, they were handling like classic routes. Moreover, it was only possible to add this kind of routes by specifying an interface. Bug already reported here: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=498498 Before the patch: $ ip route add blackhole 2001::1/128 RTNETLINK answers: No such device $ ip route add blackhole 2001::1/128 dev eth0 $ ip -6 route | grep 2001 2001::1 dev eth0 metric 1024 After: $ ip route add blackhole 2001::1/128 $ ip -6 route | grep 2001 blackhole 2001::1 dev lo metric 1024 error -22 v2: wrong patch v3: add a field fc_type in struct fib6_config to store RTN_* type Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ddf2d132e7f..fa264447a751 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1463,8 +1463,18 @@ int ip6_route_add(struct fib6_config *cfg) } rt->dst.output = ip6_pkt_discard_out; rt->dst.input = ip6_pkt_discard; - rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; + switch (cfg->fc_type) { + case RTN_BLACKHOLE: + rt->dst.error = -EINVAL; + break; + case RTN_PROHIBIT: + rt->dst.error = -EACCES; + break; + default: + rt->dst.error = -ENETUNREACH; + break; + } goto install_route; } @@ -2261,8 +2271,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_src_len = rtm->rtm_src_len; cfg->fc_flags = RTF_UP; cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_type = rtm->rtm_type; - if (rtm->rtm_type == RTN_UNREACHABLE) + if (rtm->rtm_type == RTN_UNREACHABLE || + rtm->rtm_type == RTN_BLACKHOLE || + rtm->rtm_type == RTN_PROHIBIT) cfg->fc_flags |= RTF_REJECT; if (rtm->rtm_type == RTN_LOCAL) @@ -2391,8 +2404,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) - rtm->rtm_type = RTN_UNREACHABLE; + if (rt->rt6i_flags & RTF_REJECT) { + switch (rt->dst.error) { + case -EINVAL: + rtm->rtm_type = RTN_BLACKHOLE; + break; + case -EACCES: + rtm->rtm_type = RTN_PROHIBIT; + break; + default: + rtm->rtm_type = RTN_UNREACHABLE; + break; + } + } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) -- cgit v1.2.1 From ed6fe9d614fc1bca95eb8c0ccd0e92db00ef9d5d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 1 Sep 2012 12:34:07 -0400 Subject: Fix order of arguments to compat_put_time[spec|val] Commit 644595f89620 ("compat: Handle COMPAT_USE_64BIT_TIME in net/socket.c") introduced a bug where the helper functions to take either a 64-bit or compat time[spec|val] got the arguments in the wrong order, passing the kernel stack pointer off as a user pointer (and vice versa). Because of the user address range check, that in turn then causes an EFAULT due to the user pointer range checking failing for the kernel address. Incorrectly resuling in a failed system call for 32-bit processes with a 64-bit kernel. On odder architectures like HP-PA (with separate user/kernel address spaces), it can be used read kernel memory. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- net/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index a5471f804d99..edc3c4af9085 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2604,7 +2604,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) - err = compat_put_timeval(up, &ktv); + err = compat_put_timeval(&ktv, up); return err; } @@ -2620,7 +2620,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) - err = compat_put_timespec(up, &kts); + err = compat_put_timespec(&kts, up); return err; } -- cgit v1.2.1 From a4ed53466a289a4139405bf7190c78a8e9d8debf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 6 Sep 2012 13:20:53 +0800 Subject: mac80211: use list_move instead of list_del/list_add Using list_move() instead of list_del() + list_add(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 507121dad082..83608ac16780 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -233,8 +233,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work) u32 dur = dep->duration; dep->duration = dur - roc->duration; roc->duration = dur; - list_del(&dep->list); - list_add(&dep->list, &roc->list); + list_move(&dep->list, &roc->list); } } out_unlock: -- cgit v1.2.1 From 0626af3139572610b56376580d11eb65d45d9dd7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 07:49:03 +0000 Subject: netfilter: take care of timewait sockets Sami Farin reported crashes in xt_LOG because it assumes skb->sk is a full blown socket. Since (41063e9 ipv4: Early TCP socket demux), we can have skb->sk pointing to a timewait socket. Same fix is needed in nfnetlink_log. Diagnosed-by: Florian Westphal Reported-by: Sami Farin Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 14 ++++++++------ net/netfilter/xt_LOG.c | 33 +++++++++++++++++---------------- 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 14e2f3903142..5cfb5bedb2b8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -381,6 +381,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; + struct sock *sk; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -499,18 +500,19 @@ __build_packet_message(struct nfulnl_instance *inst, } /* UID */ - if (skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - struct file *file = skb->sk->sk_socket->file; + sk = skb->sk; + if (sk && sk->sk_state != TCP_TIME_WAIT) { + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + struct file *file = sk->sk_socket->file; __be32 uid = htonl(file->f_cred->fsuid); __be32 gid = htonl(file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; } else - read_unlock_bh(&skb->sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* local sequence number */ diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index ff5f75fddb15..2a4f9693e799 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -145,6 +145,19 @@ static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, return 0; } +static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk) +{ + if (!sk || sk->sk_state == TCP_TIME_WAIT) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + sk->sk_socket->file->f_cred->fsuid, + sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&sk->sk_callback_lock); +} + /* One level of recursion won't kill us */ static void dump_ipv4_packet(struct sbuff *m, const struct nf_loginfo *info, @@ -361,14 +374,8 @@ static void dump_ipv4_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && !iphoff) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) @@ -717,14 +724,8 @@ static void dump_ipv6_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && recurse) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!recurse && skb->mark) -- cgit v1.2.1 From 30dd3edf97abda301150c8cf26fed21e53e3a9ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Sep 2012 19:15:01 +0200 Subject: mac80211: don't hang on to sched_scan_ies There's no need to keep a copy of the scheduled scan IEs after the driver has been told, if it requires a copy it must make one. Therefore, we can move sched_scan_ies into the function. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/scan.c | 39 ++++++++++++++------------------------- 2 files changed, 14 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 204bfedba306..0a983fbd743d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -970,7 +970,6 @@ struct ieee80211_local { int scan_channel_idx; int scan_ies_len; - struct ieee80211_sched_scan_ies sched_scan_ies; struct work_struct sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 740e414d44f4..e9b19294dd45 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -917,6 +917,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sched_scan_ies sched_scan_ies; int ret, i; mutex_lock(&local->mtx); @@ -935,33 +936,28 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, if (!local->hw.wiphy->bands[i]) continue; - local->sched_scan_ies.ie[i] = kzalloc(2 + - IEEE80211_MAX_SSID_LEN + - local->scan_ies_len + - req->ie_len, - GFP_KERNEL); - if (!local->sched_scan_ies.ie[i]) { + sched_scan_ies.ie[i] = kzalloc(2 + IEEE80211_MAX_SSID_LEN + + local->scan_ies_len + + req->ie_len, + GFP_KERNEL); + if (!sched_scan_ies.ie[i]) { ret = -ENOMEM; goto out_free; } - local->sched_scan_ies.len[i] = - ieee80211_build_preq_ies(local, - local->sched_scan_ies.ie[i], + sched_scan_ies.len[i] = + ieee80211_build_preq_ies(local, sched_scan_ies.ie[i], req->ie, req->ie_len, i, (u32) -1, 0); } - ret = drv_sched_scan_start(local, sdata, req, - &local->sched_scan_ies); - if (ret == 0) { + ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); + if (ret == 0) rcu_assign_pointer(local->sched_scan_sdata, sdata); - goto out; - } out_free: while (i > 0) - kfree(local->sched_scan_ies.ie[--i]); + kfree(sched_scan_ies.ie[--i]); out: mutex_unlock(&local->mtx); return ret; @@ -970,7 +966,7 @@ out: int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - int ret = 0, i; + int ret = 0; mutex_lock(&local->mtx); @@ -979,12 +975,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) goto out; } - if (rcu_access_pointer(local->sched_scan_sdata)) { - for (i = 0; i < IEEE80211_NUM_BANDS; i++) - kfree(local->sched_scan_ies.ie[i]); - + if (rcu_access_pointer(local->sched_scan_sdata)) drv_sched_scan_stop(local, sdata); - } + out: mutex_unlock(&local->mtx); @@ -1006,7 +999,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, sched_scan_stopped_work); - int i; mutex_lock(&local->mtx); @@ -1015,9 +1007,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) return; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) - kfree(local->sched_scan_ies.ie[i]); - rcu_assign_pointer(local->sched_scan_sdata, NULL); mutex_unlock(&local->mtx); -- cgit v1.2.1 From 882a7c69d3c605bfacf32e19033447dc70204d45 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 22:32:45 +0200 Subject: mac80211: disconnect if channel switch fails Disconnect from the AP if channel switching in the driver failed or if the new channel is unavailable. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 55 +++++++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0a983fbd743d..e2ab03c773e3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -411,6 +411,7 @@ struct ieee80211_if_managed { struct work_struct monitor_work; struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; + struct work_struct csa_connection_drop_work; unsigned long beacon_timeout; unsigned long probe_timeout; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d77650d4363..6e374cb04af6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -730,16 +730,13 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) trace_api_chswitch_done(sdata, success); if (!success) { - /* - * If the channel switch was not successful, stay - * around on the old channel. We currently lack - * good handling of this situation, possibly we - * should just drop the association. - */ - sdata->local->csa_channel = sdata->local->oper_channel; + sdata_info(sdata, + "driver channel switch failed, disconnecting\n"); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); + } else { + ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } - - ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } EXPORT_SYMBOL(ieee80211_chswitch_done); @@ -784,8 +781,14 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); - if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) + if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { + sdata_info(sdata, + "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", + ifmgd->associated->bssid, new_freq); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); return; + } sdata->local->csa_channel = new_ch; @@ -1692,7 +1695,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ap_probereq_get); -static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, + bool transmit_frame) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; @@ -1704,12 +1708,10 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) return; } - sdata_info(sdata, "Connection to AP %pM lost\n", - ifmgd->associated->bssid); - ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - false, frame_buf); + transmit_frame, frame_buf); + ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; mutex_unlock(&ifmgd->mtx); /* @@ -1739,10 +1741,24 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) rcu_read_unlock(); } - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - __ieee80211_connection_loss(sdata); - else + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) { + sdata_info(sdata, "Connection to AP %pM lost\n", + ifmgd->bssid); + __ieee80211_disconnect(sdata, false); + } else { ieee80211_mgd_probe_ap(sdata, true); + } +} + +static void ieee80211_csa_connection_drop_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.csa_connection_drop_work); + + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + __ieee80211_disconnect(sdata, true); } void ieee80211_beacon_loss(struct ieee80211_vif *vif) @@ -2929,6 +2945,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->monitor_work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); + cancel_work_sync(&ifmgd->csa_connection_drop_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -2985,6 +3002,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); + INIT_WORK(&ifmgd->csa_connection_drop_work, + ieee80211_csa_connection_drop_work); INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); -- cgit v1.2.1 From 761a48d2603c0ff48024bc70c129b00ec37639ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2012 13:07:00 +0200 Subject: mac80211: check power constraint IE size when parsing The power constraint IE is always a single byte so check the size when parsing instead of later. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/mlme.c | 10 ++-------- net/mac80211/util.c | 5 ++++- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e2ab03c773e3..b95fa256d438 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1165,7 +1165,6 @@ struct ieee802_11_elems { u8 prep_len; u8 perr_len; u8 country_elem_len; - u8 pwr_constr_elem_len; u8 quiet_elem_len; u8 num_of_quiet_elem; /* can be more the one */ u8 timeout_int_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6e374cb04af6..87466942fa82 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -821,18 +821,13 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, - u16 capab_info, u8 *pwr_constr_elem, - u8 pwr_constr_elem_len) + u16 capab_info, u8 *pwr_constr_elem) { struct ieee80211_conf *conf = &sdata->local->hw.conf; if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT)) return; - /* Power constraint IE length should be 1 octet */ - if (pwr_constr_elem_len != 1) - return; - if ((*pwr_constr_elem <= conf->channel->max_reg_power) && (*pwr_constr_elem != sdata->local->power_constr_level)) { sdata->local->power_constr_level = *pwr_constr_elem; @@ -2552,8 +2547,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (elems.pwr_constr_elem) ieee80211_handle_pwr_constr(sdata, le16_to_cpu(mgmt->u.probe_resp.capab_info), - elems.pwr_constr_elem, - elems.pwr_constr_elem_len); + elems.pwr_constr_elem); } ieee80211_bss_info_change_notify(sdata, changed); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 471fb0516c99..ed7543960b16 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -792,8 +792,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->country_elem_len = elen; break; case WLAN_EID_PWR_CONSTRAINT: + if (elen != 1) { + elem_parse_failed = true; + break; + } elems->pwr_constr_elem = pos; - elems->pwr_constr_elem_len = elen; break; case WLAN_EID_TIMEOUT_INTERVAL: elems->timeout_int = pos; -- cgit v1.2.1 From 964b19f9770cd8d299fa99c84b0ff2e90df9c523 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 5 Sep 2012 20:23:56 +0300 Subject: mac80211: use synchronize_net() on key destroying __ieee80211_key_destroy() calls synchronize_rcu() in order to sync the tx path before destroying the key. However, synching the tx path can be done with synchronize_net() as well, which is usually faster (the timing might be important for roaming scenarios). Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 7ae678ba5d67..d27e61aaa71b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -402,7 +402,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) * Synchronize so the TX path can no longer be using * this key before we free/remove it. */ - synchronize_rcu(); + synchronize_net(); if (key->local) ieee80211_key_disable_hw_accel(key); -- cgit v1.2.1 From 23a85b45cfe954ba418f68352351f21cc28decfd Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 26 Jun 2012 14:37:21 +0200 Subject: mac80211: refactor set_channel_type Split functionality for further reuse. Will prevent code duplication when channel context channel_type merging is introduced. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 67 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f0f87e5a1d35..0bfc914ddd15 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -68,16 +68,14 @@ ieee80211_get_channel_mode(struct ieee80211_local *local, return mode; } -bool ieee80211_set_channel_type(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - enum nl80211_channel_type chantype) +static enum nl80211_channel_type +ieee80211_get_superchan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *tmp; enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT; - bool result; + struct ieee80211_sub_if_data *tmp; mutex_lock(&local->iflist_mtx); - list_for_each_entry(tmp, &local->interfaces, list) { if (tmp == sdata) continue; @@ -103,39 +101,70 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local, break; } } + mutex_unlock(&local->iflist_mtx); - switch (superchan) { + return superchan; +} + +static bool +ieee80211_channel_types_are_compatible(enum nl80211_channel_type chantype1, + enum nl80211_channel_type chantype2, + enum nl80211_channel_type *compat) +{ + /* + * start out with chantype1 being the result, + * overwriting later if needed + */ + if (compat) + *compat = chantype1; + + switch (chantype1) { case NL80211_CHAN_NO_HT: + if (compat) + *compat = chantype2; + break; case NL80211_CHAN_HT20: /* * allow any change that doesn't go to no-HT * (if it already is no-HT no change is needed) */ - if (chantype == NL80211_CHAN_NO_HT) + if (chantype2 == NL80211_CHAN_NO_HT) break; - superchan = chantype; + if (compat) + *compat = chantype2; break; case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: /* allow smaller bandwidth and same */ - if (chantype == NL80211_CHAN_NO_HT) + if (chantype2 == NL80211_CHAN_NO_HT) break; - if (chantype == NL80211_CHAN_HT20) + if (chantype2 == NL80211_CHAN_HT20) break; - if (superchan == chantype) + if (chantype2 == chantype1) break; - result = false; - goto out; + return false; } - local->_oper_channel_type = superchan; + return true; +} + +bool ieee80211_set_channel_type(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_channel_type chantype) +{ + enum nl80211_channel_type superchan; + enum nl80211_channel_type compatchan; + + superchan = ieee80211_get_superchan(local, sdata); + if (!ieee80211_channel_types_are_compatible(superchan, chantype, + &compatchan)) + return false; + + local->_oper_channel_type = compatchan; if (sdata) sdata->vif.bss_conf.channel_type = chantype; - result = true; - out: - mutex_unlock(&local->iflist_mtx); + return true; - return result; } -- cgit v1.2.1 From 7ab4551f3b391818e29263279031dca1e26417c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Sep 2012 08:07:13 +0000 Subject: tcp: fix TFO regression Fengguang Wu reported various panics and bisected to commit 8336886f786fdac (tcp: TCP Fast Open Server - support TFO listeners) Fix this by making sure socket is a TCP socket before accessing TFO data structures. [ 233.046014] kfree_debugcheck: out of range ptr ea6000000bb8h. [ 233.047399] ------------[ cut here ]------------ [ 233.048393] kernel BUG at /c/kernel-tests/src/stable/mm/slab.c:3074! [ 233.048393] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC [ 233.048393] Modules linked in: [ 233.048393] CPU 0 [ 233.048393] Pid: 3929, comm: trinity-watchdo Not tainted 3.6.0-rc3+ #4192 Bochs Bochs [ 233.048393] RIP: 0010:[] [] kfree_debugcheck+0x27/0x2d [ 233.048393] RSP: 0018:ffff88000facbca8 EFLAGS: 00010092 [ 233.048393] RAX: 0000000000000031 RBX: 0000ea6000000bb8 RCX: 00000000a189a188 [ 233.048393] RDX: 000000000000a189 RSI: ffffffff8108ad32 RDI: ffffffff810d30f9 [ 233.048393] RBP: ffff88000facbcb8 R08: 0000000000000002 R09: ffffffff843846f0 [ 233.048393] R10: ffffffff810ae37c R11: 0000000000000908 R12: 0000000000000202 [ 233.048393] R13: ffffffff823dbd5a R14: ffff88000ec5bea8 R15: ffffffff8363c780 [ 233.048393] FS: 00007faa6899c700(0000) GS:ffff88001f200000(0000) knlGS:0000000000000000 [ 233.048393] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 233.048393] CR2: 00007faa6841019c CR3: 0000000012c82000 CR4: 00000000000006f0 [ 233.048393] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 233.048393] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 233.048393] Process trinity-watchdo (pid: 3929, threadinfo ffff88000faca000, task ffff88000faec600) [ 233.048393] Stack: [ 233.048393] 0000000000000000 0000ea6000000bb8 ffff88000facbce8 ffffffff8116ad81 [ 233.048393] ffff88000ff588a0 ffff88000ff58850 ffff88000ff588a0 0000000000000000 [ 233.048393] ffff88000facbd08 ffffffff823dbd5a ffffffff823dbcb0 ffff88000ff58850 [ 233.048393] Call Trace: [ 233.048393] [] kfree+0x5f/0xca [ 233.048393] [] inet_sock_destruct+0xaa/0x13c [ 233.048393] [] ? inet_sk_rebuild_header +0x319/0x319 [ 233.048393] [] __sk_free+0x21/0x14b [ 233.048393] [] sk_free+0x26/0x2a [ 233.048393] [] sctp_close+0x215/0x224 [ 233.048393] [] ? lock_release+0x16f/0x1b9 [ 233.048393] [] inet_release+0x7e/0x85 [ 233.048393] [] sock_release+0x1f/0x77 [ 233.048393] [] sock_close+0x27/0x2b [ 233.048393] [] __fput+0x101/0x20a [ 233.048393] [] ____fput+0xe/0x10 [ 233.048393] [] task_work_run+0x5d/0x75 [ 233.048393] [] do_exit+0x290/0x7f5 [ 233.048393] [] ? retint_swapgs+0x13/0x1b [ 233.048393] [] do_group_exit+0x7b/0xba [ 233.048393] [] sys_exit_group+0x17/0x17 [ 233.048393] [] tracesys+0xdd/0xe2 [ 233.048393] Code: 59 01 5d c3 55 48 89 e5 53 41 50 0f 1f 44 00 00 48 89 fb e8 d4 b0 f0 ff 84 c0 75 11 48 89 de 48 c7 c7 fc fa f7 82 e8 0d 0f 57 01 <0f> 0b 5f 5b 5d c3 55 48 89 e5 0f 1f 44 00 00 48 63 87 d8 00 00 [ 233.048393] RIP [] kfree_debugcheck+0x27/0x2d [ 233.048393] RSP Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Eric Dumazet Cc: "H.K. Jerry Chu" Acked-by: Neal Cardwell Acked-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 ++----- net/ipv4/inet_connection_sock.c | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4f70ef0b946d..845372b025f6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -149,11 +149,8 @@ void inet_sock_destruct(struct sock *sk) pr_err("Attempt to release alive inet socket %p\n", sk); return; } - if (sk->sk_type == SOCK_STREAM) { - struct fastopen_queue *fastopenq = - inet_csk(sk)->icsk_accept_queue.fastopenq; - kfree(fastopenq); - } + if (sk->sk_protocol == IPPROTO_TCP) + kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8464b79c493f..f0c5b9c1a957 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -314,7 +314,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) newsk = req->sk; sk_acceptq_removed(sk); - if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) { + if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { spin_lock_bh(&queue->fastopenq->lock); if (tcp_rsk(req)->listener) { /* We are still waiting for the final ACK from 3WHS @@ -775,7 +775,7 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); - if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) { + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { BUG_ON(tcp_sk(child)->fastopen_rsk != req); BUG_ON(sk != tcp_rsk(req)->listener); -- cgit v1.2.1 From 316b6b5df77db801d62ec381cfae0c38ff84252c Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Thu, 6 Sep 2012 18:09:16 +0200 Subject: net/mac80211/scan.c: removes unnecessary semicolon removes unnecessary semicolon Found by Coccinelle: http://coccinelle.lip6.fr/ Signed-off-by: Peter Senna Tschudin Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index e9b19294dd45..c4cdbde24fd3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -407,7 +407,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, enum ieee80211_band band = local->hw.conf.channel->band; sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx));; + lockdep_is_held(&local->mtx)); for (i = 0; i < local->scan_req->n_ssids; i++) ieee80211_send_probe_req( -- cgit v1.2.1 From 6ae16775d6bcd57e64100fda78fd01c8e7e7f08d Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 7 Sep 2012 13:28:52 +0200 Subject: mac80211: move ieee80211_send_deauth_disassoc outside mlme code Move ieee80211_send_deauth_disassoc() to util.c to make it available for the rest of the mac80211 code. Signed-off-by: Antonio Quartulli [reword commit message] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 5 ++++ net/mac80211/mlme.c | 60 ++++++++-------------------------------------- net/mac80211/util.c | 39 ++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b95fa256d438..887452327ba8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -68,6 +68,8 @@ struct ieee80211_local; #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL +#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -1458,6 +1460,9 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u8 *extra, size_t extra_len, const u8 *bssid, const u8 *da, const u8 *key, u8 key_len, u8 key_idx); +void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, u16 stype, u16 reason, + bool send_frame, u8 *frame_buf); int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87466942fa82..0ca34137a3b5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -88,8 +88,6 @@ MODULE_PARM_DESC(probe_wait_ms, #define TMR_RUNNING_TIMER 0 #define TMR_RUNNING_CHANSW 1 -#define DEAUTH_DISASSOC_LEN (24 /* hdr */ + 2 /* reason */) - /* * All cfg80211 functions have to be called outside a locked * section so that they can acquire a lock themselves... This @@ -574,46 +572,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, - u16 reason, bool send_frame, - u8 *frame_buf) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt = (void *)frame_buf; - - /* build frame */ - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); - mgmt->duration = 0; /* initialize only */ - mgmt->seq_ctrl = 0; /* initialize only */ - memcpy(mgmt->da, bssid, ETH_ALEN); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, bssid, ETH_ALEN); - /* u.deauth.reason_code == u.disassoc.reason_code */ - mgmt->u.deauth.reason_code = cpu_to_le16(reason); - - if (send_frame) { - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - DEAUTH_DISASSOC_LEN); - if (!skb) - return; - - skb_reserve(skb, local->hw.extra_tx_headroom); - - /* copy in frame */ - memcpy(skb_put(skb, DEAUTH_DISASSOC_LEN), - mgmt, DEAUTH_DISASSOC_LEN); - - if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_INTFL_DONT_ENCRYPT; - - ieee80211_tx_skb(sdata, skb); - } -} - void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -1695,7 +1653,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); if (!ifmgd->associated) { @@ -1713,7 +1671,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, * must be outside lock due to cfg80211, * but that's not a problem. */ - cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&local->mtx); ieee80211_recalc_idle(local); @@ -2645,7 +2603,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, false, frame_buf); @@ -2655,7 +2613,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, * must be outside lock due to cfg80211, * but that's not a problem. */ - cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&local->mtx); ieee80211_recalc_idle(local); @@ -3538,7 +3496,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct cfg80211_deauth_request *req) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -3566,7 +3524,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); - __cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + __cfg80211_send_deauth(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); @@ -3580,7 +3539,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -3605,7 +3564,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, frame_buf); mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + __cfg80211_send_disassoc(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ed7543960b16..2017904c69cc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1007,6 +1007,45 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, u16 stype, u16 reason, + bool send_frame, u8 *frame_buf) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt = (void *)frame_buf; + + /* build frame */ + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); + mgmt->duration = 0; /* initialize only */ + mgmt->seq_ctrl = 0; /* initialize only */ + memcpy(mgmt->da, bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + /* u.deauth.reason_code == u.disassoc.reason_code */ + mgmt->u.deauth.reason_code = cpu_to_le16(reason); + + if (send_frame) { + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + IEEE80211_DEAUTH_FRAME_LEN); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + /* copy in frame */ + memcpy(skb_put(skb, IEEE80211_DEAUTH_FRAME_LEN), + mgmt, IEEE80211_DEAUTH_FRAME_LEN); + + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) + IEEE80211_SKB_CB(skb)->flags |= + IEEE80211_TX_INTFL_DONT_ENCRYPT; + + ieee80211_tx_skb(sdata, skb); + } +} + int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, -- cgit v1.2.1 From 2cc59e784b54fb95accbd5f5a9d12041eec72dbc Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 7 Sep 2012 13:28:53 +0200 Subject: mac80211: reply to AUTH with DEAUTH if sta allocation fails in IBSS Whenever a host gets an AUTH frame it first allocates a new station and then replies with another AUTH frame. However, if sta allocations fails the host should send a DEAUTH frame instead to tell the other end that something went wrong. Signed-off-by: Antonio Quartulli [reword commit message a bit] Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a9d93285dba7..7c082517f0c7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -332,11 +332,27 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return ieee80211_ibss_finish_sta(sta, auth); } +static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u16 reason = le16_to_cpu(mgmt->u.deauth.reason_code); + + if (len < IEEE80211_DEAUTH_FRAME_LEN) + return; + + ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, reason); + sta_info_destroy_addr(sdata, mgmt->sa); +} + static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { u16 auth_alg, auth_transaction; + struct sta_info *sta; + u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; lockdep_assert_held(&sdata->u.ibss.mtx); @@ -352,9 +368,21 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); + sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); + /* + * if we have any problem in allocating the new station, we reply with a + * DEAUTH frame to tell the other end that we had a problem + */ + if (!sta) { + ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, true, + deauth_frame_buf); + return; + } + /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -902,6 +930,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); break; + case IEEE80211_STYPE_DEAUTH: + ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len); + break; } mgmt_out: -- cgit v1.2.1 From f39c1bfb5a03e2d255451bff05be0d7255298fa4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Sep 2012 11:08:50 -0400 Subject: SUNRPC: Fix a UDP transport regression Commit 43cedbf0e8dfb9c5610eb7985d5f21263e313802 (SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot) is causing hangs in the case of NFS over UDP mounts. Since neither the UDP or the RDMA transport mechanism use dynamic slot allocation, we can skip grabbing the socket lock for those transports. Add a new rpc_xprt_op to allow switching between the TCP and UDP/RDMA case. Note that the NFSv4.1 back channel assigns the slot directly through rpc_run_bc_task, so we can ignore that case. Reported-by: Dick Streefland Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.1] --- net/sunrpc/xprt.c | 34 ++++++++++++++++++++-------------- net/sunrpc/xprtrdma/transport.c | 1 + net/sunrpc/xprtsock.c | 3 +++ 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a5a402a7d21f..5d7f61d7559c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -969,11 +969,11 @@ static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) return false; } -static void xprt_alloc_slot(struct rpc_task *task) +void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; + spin_lock(&xprt->reserve_lock); if (!list_empty(&xprt->free)) { req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); list_del(&req->rq_list); @@ -994,12 +994,29 @@ static void xprt_alloc_slot(struct rpc_task *task) default: task->tk_status = -EAGAIN; } + spin_unlock(&xprt->reserve_lock); return; out_init_req: task->tk_status = 0; task->tk_rqstp = req; xprt_request_init(task, xprt); + spin_unlock(&xprt->reserve_lock); +} +EXPORT_SYMBOL_GPL(xprt_alloc_slot); + +void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + /* Note: grabbing the xprt_lock_write() ensures that we throttle + * new slot allocation if the transport is congested (i.e. when + * reconnecting a stream transport or when out of socket write + * buffer space). + */ + if (xprt_lock_write(xprt, task)) { + xprt_alloc_slot(xprt, task); + xprt_release_write(xprt, task); + } } +EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { @@ -1083,20 +1100,9 @@ void xprt_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - /* Note: grabbing the xprt_lock_write() here is not strictly needed, - * but ensures that we throttle new slot allocation if the transport - * is congested (e.g. if reconnecting or if we're out of socket - * write buffer space). - */ task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (!xprt_lock_write(xprt, task)) - return; - - spin_lock(&xprt->reserve_lock); - xprt_alloc_slot(task); - spin_unlock(&xprt->reserve_lock); - xprt_release_write(xprt, task); + xprt->ops->alloc_slot(xprt, task); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 06cdbff79e4a..5d9202dc7cb1 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -713,6 +713,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xprt_rdma_procs = { .reserve_xprt = xprt_rdma_reserve_xprt, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .alloc_slot = xprt_alloc_slot, .release_request = xprt_release_rqst_cong, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 400567243f84..a35b8e52e551 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2473,6 +2473,7 @@ static void bc_destroy(struct rpc_xprt *xprt) static struct rpc_xprt_ops xs_local_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, .connect = xs_connect, @@ -2489,6 +2490,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .alloc_slot = xprt_alloc_slot, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, @@ -2506,6 +2508,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .alloc_slot = xprt_lock_and_alloc_slot, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, -- cgit v1.2.1 From 979402b16cde048ced4839e21cc49e0779352b80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Sep 2012 23:34:44 +0000 Subject: udp: increment UDP_MIB_INERRORS if copy failed In UDP recvmsg(), we miss an increase of UDP_MIB_INERRORS if the copy of skb to userspace failed for whatever reason. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 +++++ net/ipv6/udp.c | 11 +++++++++++ 2 files changed, 16 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6f6d1aca3c3d..2814f66dac64 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1226,6 +1226,11 @@ try_again: if (unlikely(err)) { trace_kfree_skb(skb, udp_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } goto out_free; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..07e2bfef6845 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -394,6 +394,17 @@ try_again: } if (unlikely(err)) { trace_kfree_skb(skb, udpv6_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + } goto out_free; } if (!peeked) { -- cgit v1.2.1 From b4949ab269a20e9af9a0c40729bac56e8f8a43a2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 6 Sep 2012 05:53:35 +0000 Subject: ipv6: fix handling of throw routes It's the same problem that previous fix about blackhole and prohibit routes. When adding a throw route, it was handled like a classic route. Moreover, it was only possible to add this kind of routes by specifying an interface. Before the patch: $ ip route add throw 2001::2/128 RTNETLINK answers: No such device $ ip route add throw 2001::2/128 dev eth0 $ ip -6 route | grep 2001::2 2001::2 dev eth0 metric 1024 After: $ ip route add throw 2001::2/128 $ ip -6 route | grep 2001::2 throw 2001::2 dev lo metric 1024 error -11 Reported-by: Markus Stenberg Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fa264447a751..339d921cf3b6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1471,6 +1471,9 @@ int ip6_route_add(struct fib6_config *cfg) case RTN_PROHIBIT: rt->dst.error = -EACCES; break; + case RTN_THROW: + rt->dst.error = -EAGAIN; + break; default: rt->dst.error = -ENETUNREACH; break; @@ -2275,7 +2278,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_UNREACHABLE || rtm->rtm_type == RTN_BLACKHOLE || - rtm->rtm_type == RTN_PROHIBIT) + rtm->rtm_type == RTN_PROHIBIT || + rtm->rtm_type == RTN_THROW) cfg->fc_flags |= RTF_REJECT; if (rtm->rtm_type == RTN_LOCAL) @@ -2412,6 +2416,9 @@ static int rt6_fill_node(struct net *net, case -EACCES: rtm->rtm_type = RTN_PROHIBIT; break; + case -EAGAIN: + rtm->rtm_type = RTN_THROW; + break; default: rtm->rtm_type = RTN_UNREACHABLE; break; -- cgit v1.2.1 From d679c5324d9a87c6295f56c2dea52d5f68834f41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Sep 2012 20:37:06 +0000 Subject: igmp: avoid drop_monitor false positives igmp should call consume_skb() for all correctly processed packets, to avoid false dropwatch/drop_monitor false positives. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0b5580c69f2d..3479b98a00a7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -815,14 +815,15 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, __be32 group) +/* return true if packet was dropped */ +static bool igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; /* Timers are only set for non-local groups */ if (group == IGMP_ALL_HOSTS) - return; + return false; rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { @@ -832,9 +833,11 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) } } rcu_read_unlock(); + return false; } -static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, +/* return true if packet was dropped */ +static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int len) { struct igmphdr *ih = igmp_hdr(skb); @@ -866,7 +869,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, /* clear deleted report items */ igmpv3_clear_delrec(in_dev); } else if (len < 12) { - return; /* ignore bogus packet; freed by caller */ + return true; /* ignore bogus packet; freed by caller */ } else if (IGMP_V1_SEEN(in_dev)) { /* This is a v3 query with v1 queriers present */ max_delay = IGMP_Query_Response_Interval; @@ -883,13 +886,13 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return; + return true; ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) + ntohs(ih3->nsrcs)*sizeof(__be32))) - return; + return true; ih3 = igmpv3_query_hdr(skb); } @@ -901,9 +904,9 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qrv = ih3->qrv; if (!group) { /* general query */ if (ih3->nsrcs) - return; /* no sources allowed */ + return false; /* no sources allowed */ igmp_gq_start_timer(in_dev); - return; + return false; } /* mark sources to include, if group & source-specific */ mark = ih3->nsrcs != 0; @@ -939,6 +942,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, igmp_mod_timer(im, max_delay); } rcu_read_unlock(); + return false; } /* called in rcu_read_lock() section */ @@ -948,6 +952,7 @@ int igmp_rcv(struct sk_buff *skb) struct igmphdr *ih; struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; + bool dropped = true; if (in_dev == NULL) goto drop; @@ -969,7 +974,7 @@ int igmp_rcv(struct sk_buff *skb) ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_QUERY: - igmp_heard_query(in_dev, skb, len); + dropped = igmp_heard_query(in_dev, skb, len); break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: @@ -979,7 +984,7 @@ int igmp_rcv(struct sk_buff *skb) /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST) - igmp_heard_report(in_dev, ih->group); + dropped = igmp_heard_report(in_dev, ih->group); break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 @@ -997,7 +1002,10 @@ int igmp_rcv(struct sk_buff *skb) } drop: - kfree_skb(skb); + if (dropped) + kfree_skb(skb); + else + consume_skb(skb); return 0; } -- cgit v1.2.1 From dbe9a4173ea53b72b2c35d19f676a85b69f1c9fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 6 Sep 2012 18:20:01 +0000 Subject: scm: Don't use struct ucred in NETLINK_CB and struct scm_cookie. Passing uids and gids on NETLINK_CB from a process in one user namespace to a process in another user namespace can result in the wrong uid or gid being presented to userspace. Avoid that problem by passing kuids and kgids instead. - define struct scm_creds for use in scm_cookie and netlink_skb_parms that holds uid and gid information in kuid_t and kgid_t. - Modify scm_set_cred to fill out scm_creds by heand instead of using cred_to_ucred to fill out struct ucred. This conversion ensures userspace does not get incorrect uid or gid values to look at. - Modify scm_recv to convert from struct scm_creds to struct ucred before copying credential values to userspace. - Modify __scm_send to populate struct scm_creds on in the scm_cookie, instead of just copying struct ucred from userspace. - Modify netlink_sendmsg to copy scm_creds instead of struct ucred into the NETLINK_CB. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/scm.c | 17 +++++++++++------ net/netlink/af_netlink.c | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 6ab491d6c26f..9c1c63da3ca8 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -155,19 +155,21 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) break; case SCM_CREDENTIALS: { + struct ucred creds; kuid_t uid; kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; - memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); - err = scm_check_creds(&p->creds); + memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred)); + err = scm_check_creds(&creds); if (err) goto error; - if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { + p->creds.pid = creds.pid; + if (!p->pid || pid_vnr(p->pid) != creds.pid) { struct pid *pid; err = -ESRCH; - pid = find_get_pid(p->creds.pid); + pid = find_get_pid(creds.pid); if (!pid) goto error; put_pid(p->pid); @@ -175,11 +177,14 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) } err = -EINVAL; - uid = make_kuid(current_user_ns(), p->creds.uid); - gid = make_kgid(current_user_ns(), p->creds.gid); + uid = make_kuid(current_user_ns(), creds.uid); + gid = make_kgid(current_user_ns(), creds.gid); if (!uid_valid(uid) || !gid_valid(gid)) goto error; + p->creds.uid = uid; + p->creds.gid = gid; + if (!p->cred || !uid_eq(p->cred->euid, uid) || !gid_eq(p->cred->egid, gid)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 382119917166..f530b1ca1773 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_group = dst_group; - memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); + NETLINK_CB(skb).creds = siocb->scm->creds; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { -- cgit v1.2.1 From 4ccfe6d4109252dfadcd6885f33ed600ee03dbf8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 7 Sep 2012 00:45:29 +0000 Subject: ipv4/route: arg delay is useless in rt_cache_flush() Since route cache deletion (89aef8921bfbac22f), delay is no more used. Remove it. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_frontend.c | 20 ++++++++++---------- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_trie.c | 6 +++--- net/ipv4/route.c | 19 +++---------------- 6 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 77e87aff419a..47800459e4cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index adf273f8ad2e..f14eff506743 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1500,7 +1500,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1534,7 +1534,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1551,7 +1551,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index acdee325d972..8a7cd795a96e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1061,14 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(net, -1); + rt_cache_flush(net); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(net, 0); + rt_cache_flush(net); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e498d2..274309d3aded 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3c820dae235e..8d766106b540 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1711,7 +1711,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dc9549b5eb1c..ab1e7c7c38fc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net) atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) +void rt_cache_flush(struct net *net) { rt_cache_invalidate(net); } @@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } -- cgit v1.2.1 From ba8bd0ea982427258ab86144ac5fd1a9d7763a90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Sep 2012 22:27:11 +0200 Subject: net: rt_cache_flush() cleanup We dont use jhash anymore since route cache removal, so we can get rid of get_random_bytes() calls for rt_genid changes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ab1e7c7c38fc..d39edf16d607 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -447,23 +447,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) -{ - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - void rt_cache_flush(struct net *net) { - rt_cache_invalidate(net); + atomic_inc(&net->ipv4.rt_genid); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2520,8 +2506,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->ipv4.rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.2.1 From 6b536b5e5e1da32f3ba1e3f42c7bf2f80d37dc6b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 31 Aug 2012 16:39:28 +0300 Subject: Bluetooth: Remove unneeded zero init hdev is allocated with kzalloc so zero initialization is not needed. Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fa974a19d365..86abe721f484 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1652,6 +1652,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); INIT_LIST_HEAD(&hdev->remote_oob_data); + INIT_LIST_HEAD(&hdev->conn_hash.list); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); @@ -1674,7 +1675,6 @@ struct hci_dev *hci_alloc_dev(void) hci_init_sysfs(hdev); discovery_init(hdev); - hci_conn_hash_init(hdev); return hdev; } -- cgit v1.2.1 From 9472007c62ecc8f21daa2e1e252bf73b67e535fc Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:43 +0300 Subject: Bluetooth: trivial: Make hci_chan_del return void Return code is not needed in hci_chan_del Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3e65c021df50..59f0344406c8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -935,7 +935,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn) return chan; } -int hci_chan_del(struct hci_chan *chan) +void hci_chan_del(struct hci_chan *chan) { struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; @@ -948,8 +948,6 @@ int hci_chan_del(struct hci_chan *chan) skb_queue_purge(&chan->data_q); kfree(chan); - - return 0; } void hci_chan_list_flush(struct hci_conn *conn) -- cgit v1.2.1 From 93f71941c6d3ead73ca74d447b4007c6908f6eb5 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:44 +0300 Subject: Bluetooth: trivial: Remove empty line Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 86abe721f484..9e6574a8e6e2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -268,7 +268,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) BT_ERR("Unknown device type %d", hdev->dev_type); break; } - } static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt) -- cgit v1.2.1 From e71dfabab03129182a955663cbd53406714d96c0 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:46 +0300 Subject: Bluetooth: AMP: Add Read Data Block Size to amp_init Add Read Data Block Size HCI cmd to AMP initialization, then it makes possible to send data. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9e6574a8e6e2..e4070517ff3b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -231,6 +231,9 @@ static void amp_init(struct hci_dev *hdev) /* Read Local AMP Info */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + + /* Read Data Blk size */ + hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); } static void hci_init_req(struct hci_dev *hdev, unsigned long opt) -- cgit v1.2.1 From 6862234238e84648c305526af2edd98badcad1e0 Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Fri, 7 Sep 2012 13:40:50 +0000 Subject: net: small bug on rxhash calculation In the current rxhash calculation function, while the sorting of the ports/addrs is coherent (you get the same rxhash for packets sharing the same 4-tuple, in both directions), ports and addrs are sorted independently. This implies packets from a connection between the same addresses but crossed ports hash to the same rxhash. For example, traffic between A=S:l and B=L:s is hashed (in both directions) from {L, S, {s, l}}. The same rxhash is obtained for packets between C=S:s and D=L:l. This patch ensures that you either swap both addrs and ports, or you swap none. Traffic between A and B, and traffic between C and D, get their rxhash from different sources ({L, S, {l, s}} for A<->B, and {L, S, {s, l}} for C<->D) The patch is co-written with Eric Dumazet Signed-off-by: Chema Gonzalez Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 83988362805e..d7fe32c946c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2647,15 +2647,16 @@ void __skb_get_rxhash(struct sk_buff *skb) if (!skb_flow_dissect(skb, &keys)) return; - if (keys.ports) { - if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) - swap(keys.port16[0], keys.port16[1]); + if (keys.ports) skb->l4_rxhash = 1; - } /* get a consistent hash (same value on both flow directions) */ - if ((__force u32)keys.dst < (__force u32)keys.src) + if (((__force u32)keys.dst < (__force u32)keys.src) || + (((__force u32)keys.dst == (__force u32)keys.src) && + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { swap(keys.dst, keys.src); + swap(keys.port16[0], keys.port16[1]); + } hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, -- cgit v1.2.1 From 9785e10aedfa0fad5c1aac709dce5ada1b123783 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 8 Sep 2012 02:53:53 +0000 Subject: netlink: kill netlink_set_nonroot Replace netlink_set_nonroot by one new field `flags' in struct netlink_kernel_cfg that is passed to netlink_kernel_create. This patch also renames NL_NONROOT_* to NL_CFG_F_NONROOT_* since now the flags field in nl_table is generic (so we can add more flags if needed in the future). Also adjust all callers in the net-next tree to use these flags instead of netlink_set_nonroot. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- net/netlink/af_netlink.c | 28 +++++++++++++--------------- net/netlink/genetlink.c | 3 +-- 3 files changed, 15 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c64efcff8078..a71806eb9cc6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2381,6 +2381,7 @@ static int __net_init rtnetlink_net_init(struct net *net) .groups = RTNLGRP_MAX, .input = rtnetlink_rcv, .cb_mutex = &rtnl_mutex, + .flags = NL_CFG_F_NONROOT_RECV, }; sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg); @@ -2416,7 +2417,6 @@ void __init rtnetlink_init(void) if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); - netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f530b1ca1773..b74540ce3c14 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -121,7 +121,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; - unsigned int nl_nonroot; + unsigned int flags; unsigned int groups; struct mutex *cb_mutex; struct module *module; @@ -536,6 +536,8 @@ static int netlink_release(struct socket *sock) if (--nl_table[sk->sk_protocol].registered == 0) { kfree(nl_table[sk->sk_protocol].listeners); nl_table[sk->sk_protocol].module = NULL; + nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } } else if (nlk->subscriptions) { @@ -596,7 +598,7 @@ retry: static inline int netlink_capable(const struct socket *sock, unsigned int flag) { - return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || + return (nl_table[sock->sk->sk_protocol].flags & flag) || capable(CAP_NET_ADMIN); } @@ -659,7 +661,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_NONROOT_RECV)) + if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -721,7 +723,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) + if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->pid) @@ -1244,7 +1246,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_NONROOT_RECV)) + if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -1376,7 +1378,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_pid) && - !netlink_capable(sock, NL_NONROOT_SEND)) + !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -1580,7 +1582,10 @@ netlink_kernel_create(struct net *net, int unit, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; - nl_table[unit].bind = cfg ? cfg->bind : NULL; + if (cfg) { + nl_table[unit].bind = cfg->bind; + nl_table[unit].flags = cfg->flags; + } nl_table[unit].registered = 1; } else { kfree(listeners); @@ -1679,13 +1684,6 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_table_ungrab(); } -void netlink_set_nonroot(int protocol, unsigned int flags) -{ - if ((unsigned int)protocol < MAX_LINKS) - nl_table[protocol].nl_nonroot = flags; -} -EXPORT_SYMBOL(netlink_set_nonroot); - struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) { @@ -2150,7 +2148,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; - nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; + nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; netlink_table_ungrab(); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index fda497412fc3..c1b71aef9f71 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -918,6 +918,7 @@ static int __net_init genl_pernet_init(struct net *net) struct netlink_kernel_cfg cfg = { .input = genl_rcv, .cb_mutex = &genl_mutex, + .flags = NL_CFG_F_NONROOT_RECV, }; /* we'll bump the group number right afterwards */ @@ -955,8 +956,6 @@ static int __init genl_init(void) if (err < 0) goto problem; - netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - err = register_pernet_subsys(&genl_pernet_ops); if (err) goto problem; -- cgit v1.2.1 From 9f00d9776bc5beb92e8bfc884a7e96ddc5589e2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 8 Sep 2012 02:53:54 +0000 Subject: netlink: hide struct module parameter in netlink_kernel_create This patch defines netlink_kernel_create as a wrapper function of __netlink_kernel_create to hide the struct module *me parameter (which seems to be THIS_MODULE in all existing netlink subsystems). Suggested by David S. Miller. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_ulog.c | 3 +-- net/core/rtnetlink.c | 2 +- net/core/sock_diag.c | 3 +-- net/decnet/netfilter/dn_rtmsg.c | 3 +-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 3 +-- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 8 +++----- net/netlink/genetlink.c | 3 +-- net/xfrm/xfrm_user.c | 2 +- 10 files changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 19063473c71f..3476ec469740 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -298,8 +298,7 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!ebtulognl) ret = -ENOMEM; else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a71806eb9cc6..508c5df4a09c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2384,7 +2384,7 @@ static int __net_init rtnetlink_net_init(struct net *net) .flags = NL_CFG_F_NONROOT_RECV, }; - sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg); if (!sk) return -ENOMEM; net->rtnl = sk; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 9d8755e4a7a5..602cd637182e 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net) .input = sock_diag_rcv, }; - net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, - THIS_MODULE, &cfg); + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); return net->diag_nlsk == NULL ? -ENOMEM : 0; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 11db0ecf342f..dfe42012a044 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -130,8 +130,7 @@ static int __init dn_rtmsg_init(void) .input = dnrmg_receive_user_skb, }; - dnrmg = netlink_kernel_create(&init_net, - NETLINK_DNRTMSG, THIS_MODULE, &cfg); + dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8a7cd795a96e..dc1f10ed1872 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) .input = nl_fib_input, }; - sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); if (sk == NULL) return -EAFNOSUPPORT; net->ipv4.fibnl = sk; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1109f7f6c254..b5ef3cba2250 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -396,8 +396,7 @@ static int __init ulog_tg_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!nflognl) return -ENOMEM; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a26503342e71..ffb92c03a358 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -241,7 +241,7 @@ static int __net_init nfnetlink_net_init(struct net *net) #endif }; - nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, THIS_MODULE, &cfg); + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b74540ce3c14..4d348e97e131 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1526,9 +1526,8 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(struct net *net, int unit, - struct module *module, - struct netlink_kernel_cfg *cfg) +__netlink_kernel_create(struct net *net, int unit, struct module *module, + struct netlink_kernel_cfg *cfg) { struct socket *sock; struct sock *sk; @@ -1603,8 +1602,7 @@ out_sock_release_nosk: sock_release(sock); return NULL; } -EXPORT_SYMBOL(netlink_kernel_create); - +EXPORT_SYMBOL(__netlink_kernel_create); void netlink_kernel_release(struct sock *sk) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c1b71aef9f71..19288b7d6135 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -922,8 +922,7 @@ static int __net_init genl_pernet_init(struct net *net) }; /* we'll bump the group number right afterwards */ - net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, - THIS_MODULE, &cfg); + net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg); if (!net->genl_sock && net_eq(net, &init_net)) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ab58034c42d6..354070adb5ef 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2963,7 +2963,7 @@ static int __net_init xfrm_user_net_init(struct net *net) .input = xfrm_netlink_rcv, }; - nlsk = netlink_kernel_create(net, NETLINK_XFRM, THIS_MODULE, &cfg); + nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ -- cgit v1.2.1 From 5693d68df6883f039d24a4ce8b23ac48f94a73e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 5 Sep 2012 10:10:28 +0000 Subject: netfilter: nf_nat: fix out-of-bounds access in address selection include/linux/jhash.h:138:16: warning: array subscript is above array bounds [jhash2() expects the number of u32 in the key] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 29d445235199..1816ad381485 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -255,7 +255,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, * client coming from the same IP (some Internet Banking sites * like this), even across reboots. */ - j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3), + j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); -- cgit v1.2.1 From a67299556ea1aa56daaeb985fd32295dacecba1a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Aug 2012 03:14:15 +0000 Subject: netfilter: nfnetlink_queue: remove pointless conditional before kfree_skb() Remove pointless conditional before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..5c2d78d3a4d8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -406,8 +406,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return skb; nla_put_failure: - if (skb) - kfree_skb(skb); + kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; } -- cgit v1.2.1 From 64f509ce71b08d037998e93dd51180c19b2f464c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 31 Aug 2012 09:55:53 +0000 Subject: netfilter: Mark SYN/ACK packets as invalid from original direction Clients should not send such packets. By accepting them, we open up a hole by wich ephemeral ports can be discovered in an off-path attack. See: "Reflection scan: an Off-Path Attack on TCP" by Jan Wrobel, http://arxiv.org/abs/1201.2074 Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a5ac11ebef33..aba98f942979 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sSS */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, /* * sNO -> sIV Too late and no reason to do anything * sSS -> sIV Client can't send SYN and then SYN/ACK * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open - * sSR -> sIG - * sES -> sIG Error: SYNs in window outside the SYN_SENT state - * are errors. Receiver will reply with RST - * and close the connection. - * Or we are not in sync and hold a dead connection. - * sFW -> sIG - * sCW -> sIG - * sLA -> sIG - * sTW -> sIG - * sCL -> sIG + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open + * sES -> sIV Invalid SYN/ACK packets sent by the client + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV + * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, -- cgit v1.2.1 From 4a70bbfaef0361d27272629d1a250a937edcafe4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 31 Aug 2012 09:55:54 +0000 Subject: netfilter: Validate the sequence number of dataless ACK packets as well We spare nothing by not validating the sequence number of dataless ACK packets and enabling it makes harder off-path attacks. See: "Reflection scan: an Off-Path Attack on TCP" by Jan Wrobel, http://arxiv.org/abs/1201.2074 Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index aba98f942979..e046b3756aab 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -630,15 +630,9 @@ static bool tcp_in_window(const struct nf_conn *ct, ack = sack = receiver->td_end; } - if (seq == end - && (!tcph->rst - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) /* - * Packets contains no data: we assume it is valid - * and check the ack value only. - * However RST segments are always validated by their - * SEQ number, except when seq == 0 (reset sent answering - * SYN. + * RST sent answering SYN. */ seq = end = sender->td_end; -- cgit v1.2.1 From 0edd94887d19ad73539477395c17ea0d6898947a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 5 Sep 2012 18:21:53 +0000 Subject: ipvs: use list_del_init instead of list_del/INIT_LIST_HEAD Using list_del_init() instead of list_del() + INIT_LIST_HEAD(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 767cc12da0fe..37b38d0791cd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -539,8 +539,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { - list_del(&dest->d_list); - INIT_LIST_HEAD(&dest->d_list); + list_del_init(&dest->d_list); } return 1; -- cgit v1.2.1 From e548c49e6dc6b08b59042930a2e90c69c13c9293 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Sep 2012 17:08:23 +0200 Subject: mac80211: add key flag for management keys Mark keys that might be used to receive management frames so drivers can fall back on software crypto for them if they don't support hardware offload. As the new flag is only set correctly for RX keys and the existing IEEE80211_KEY_FLAG_SW_MGMT flag can only affect TX, also rename the latter to IEEE80211_KEY_FLAG_SW_MGMT_TX. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 32 ++++++++++++++++++++++++++++++++ net/mac80211/tx.c | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 03fe6d1cff42..00e31b488adc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -170,6 +170,38 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } } + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* Keys without a station are used for TX only */ + if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; + case NL80211_IFTYPE_ADHOC: + /* no MFP (yet) */ + break; + case NL80211_IFTYPE_MESH_POINT: +#ifdef CONFIG_MAC80211_MESH + if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; +#endif + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + /* shouldn't happen */ + WARN_ON_ONCE(1); + break; + } + err = ieee80211_key_link(key, sdata, sta); if (err) ieee80211_key_free(sdata->local, key); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 29eb4e678235..e0e0d1d0e830 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -580,7 +580,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; else skip_hw = (tx->key->conf.flags & - IEEE80211_KEY_FLAG_SW_MGMT) && + IEEE80211_KEY_FLAG_SW_MGMT_TX) && ieee80211_is_mgmt(hdr->frame_control); break; case WLAN_CIPHER_SUITE_AES_CMAC: -- cgit v1.2.1 From b22cfcfcae5b2c1e9b43543b6a23e5ef517de8f8 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 9 Sep 2012 14:43:51 +0300 Subject: mac80211: use call_rcu() on sta deletion mac80211 calls synchronize_rcu() on sta deletion, which increase the roaming time significantly. Convert it into a call_rcu() mechanism, in order to avoid blocking. Since some of the cleanup functions might sleep, schedule from the call_rcu callback a new work that will do the actual cleanup. In order to make sure the cleanup occurs before the interface went down, flush local->workqueue on ieee80211_do_stop(). Signed-off-by: Yoni Divinsky Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 15 ++++-- net/mac80211/sta_info.c | 121 +++++++++++++++++++++++++++--------------------- net/mac80211/sta_info.h | 2 + 3 files changed, 81 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d747da541747..6f8a73c64fb3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -793,11 +793,20 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, flush_work(&sdata->work); /* * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path + * Call rcu_barrier() to wait both for the RX path * should it be using the interface and enqueuing - * frames at this very time on another CPU. + * frames at this very time on another CPU, and + * for the sta free call_rcu callbacks. */ - synchronize_rcu(); + rcu_barrier(); + + /* + * free_sta_rcu() enqueues a work for the actual + * sta cleanup, so we need to flush it while + * sdata is still valid. + */ + flush_workqueue(local->workqueue); + skb_queue_purge(&sdata->skb_queue); /* diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 06fa75ceb025..9c8cd8b8f753 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,6 +91,70 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } +static void free_sta_work(struct work_struct *wk) +{ + struct sta_info *sta = container_of(wk, struct sta_info, free_sta_wk); + int ac, i; + struct tid_ampdu_tx *tid_tx; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + + /* + * At this point, when being called as call_rcu callback, + * neither mac80211 nor the driver can reference this + * sta struct any more except by still existing timers + * associated with this station that we clean up below. + */ + + if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + BUG_ON(!sdata->bss); + + clear_sta_flag(sta, WLAN_STA_PS_STA); + + atomic_dec(&sdata->bss->num_sta_ps); + sta_info_recalc_tim(sta); + } + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->tx_filtered[ac]); + } + +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sdata->vif)) { + mesh_accept_plinks_update(sdata); + mesh_plink_deactivate(sta); + del_timer_sync(&sta->plink_timer); + } +#endif + + cancel_work_sync(&sta->drv_unblock_wk); + + /* + * Destroy aggregation state here. It would be nice to wait for the + * driver to finish aggregation stop and then clean up, but for now + * drivers have to handle aggregation stop being requested, followed + * directly by station destruction. + */ + for (i = 0; i < STA_TID_NUM; i++) { + tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); + if (!tid_tx) + continue; + __skb_queue_purge(&tid_tx->pending); + kfree(tid_tx); + } + + sta_info_free(local, sta); +} + +static void free_sta_rcu(struct rcu_head *h) +{ + struct sta_info *sta = container_of(h, struct sta_info, rcu_head); + + ieee80211_queue_work(&sta->local->hw, &sta->free_sta_wk); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) @@ -241,6 +305,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); + INIT_WORK(&sta->free_sta_wk, free_sta_work); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -654,8 +719,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - int ret, i, ac; - struct tid_ampdu_tx *tid_tx; + int ret, i; might_sleep(); @@ -711,65 +775,14 @@ int __must_check __sta_info_destroy(struct sta_info *sta) WARN_ON_ONCE(ret != 0); } - /* - * At this point, after we wait for an RCU grace period, - * neither mac80211 nor the driver can reference this - * sta struct any more except by still existing timers - * associated with this station that we clean up below. - */ - synchronize_rcu(); - - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { - BUG_ON(!sdata->bss); - - clear_sta_flag(sta, WLAN_STA_PS_STA); - - atomic_dec(&sdata->bss->num_sta_ps); - sta_info_recalc_tim(sta); - } - - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); - __skb_queue_purge(&sta->ps_tx_buf[ac]); - __skb_queue_purge(&sta->tx_filtered[ac]); - } - -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_accept_plinks_update(sdata); -#endif - sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cancel_work_sync(&sta->drv_unblock_wk); - cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { - mesh_plink_deactivate(sta); - del_timer_sync(&sta->plink_timer); - } -#endif - - /* - * Destroy aggregation state here. It would be nice to wait for the - * driver to finish aggregation stop and then clean up, but for now - * drivers have to handle aggregation stop being requested, followed - * directly by station destruction. - */ - for (i = 0; i < STA_TID_NUM; i++) { - tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); - if (!tid_tx) - continue; - __skb_queue_purge(&tid_tx->pending); - kfree(tid_tx); - } - - sta_info_free(local, sta); + call_rcu(&sta->rcu_head, free_sta_rcu); return 0; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a470e1123a55..c88f161f8118 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -287,6 +287,7 @@ struct sta_ampdu_mlme { struct sta_info { /* General information, mostly static */ struct list_head list; + struct rcu_head rcu_head; struct sta_info __rcu *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -297,6 +298,7 @@ struct sta_info { spinlock_t lock; struct work_struct drv_unblock_wk; + struct work_struct free_sta_wk; u16 listen_interval; -- cgit v1.2.1 From 1bad53824305807bb5cf49d6b588dd9d867586c6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 8 Sep 2012 11:58:30 +0200 Subject: mac80211: validate skb->dev in the tx status path skb->dev might contain a stale reference to a device that was already deleted, and using it unchecked can lead to invalid pointer accesses. Since this is only used for nl80211 tx, iterate over active interfaces to find a match for skb->dev, and discard the tx status if the device is gone. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/status.c | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b0801b7d572d..2ce89732d0f2 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -517,29 +517,41 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = (unsigned long)skb; + bool found = false; + acked = info->flags & IEEE80211_TX_STAT_ACK; - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { - cfg80211_probe_status(skb->dev, hdr->addr1, - cookie, acked, GFP_ATOMIC); - } else if (skb->dev) { - cfg80211_mgmt_tx_status( - skb->dev->ieee80211_ptr, cookie, skb->data, - skb->len, acked, GFP_ATOMIC); - } else { - struct ieee80211_sub_if_data *p2p_sdata; + rcu_read_lock(); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!sdata->dev) + continue; - rcu_read_lock(); + if (skb->dev != sdata->dev) + continue; - p2p_sdata = rcu_dereference(local->p2p_sdata); - if (p2p_sdata) { - cfg80211_mgmt_tx_status( - &p2p_sdata->wdev, cookie, skb->data, - skb->len, acked, GFP_ATOMIC); - } - rcu_read_unlock(); + found = true; + break; + } + + if (!skb->dev) { + sdata = rcu_dereference(local->p2p_sdata); + if (sdata) + found = true; + } + + if (!found) + skb->dev = NULL; + else if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { + cfg80211_probe_status(sdata->dev, hdr->addr1, + cookie, acked, GFP_ATOMIC); + } else { + cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data, + skb->len, acked, GFP_ATOMIC); } + + rcu_read_unlock(); } if (unlikely(info->ack_frame_id)) { -- cgit v1.2.1 From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Sep 2012 20:12:54 +0000 Subject: netlink: Rename pid to portid to avoid confusion It is a frequent mistake to confuse the netlink port identifier with a process identifier. Try to reduce this confusion by renaming fields that hold port identifiers portid instead of pid. I have carefully avoided changing the structures exported to userspace to avoid changing the userspace API. I have successfully built an allyesconfig kernel with this change. Signed-off-by: "Eric W. Biederman" Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 6 +- net/bridge/br_netlink.c | 2 +- net/can/gw.c | 2 +- net/core/fib_rules.c | 6 +- net/core/neighbour.c | 8 +- net/core/rtnetlink.c | 12 +-- net/dcb/dcbnl.c | 18 ++-- net/decnet/dn_dev.c | 6 +- net/decnet/dn_route.c | 10 +- net/decnet/dn_table.c | 12 +-- net/ieee802154/nl-mac.c | 6 +- net/ieee802154/nl-phy.c | 6 +- net/ipv4/devinet.c | 28 +++--- net/ipv4/fib_frontend.c | 10 +- net/ipv4/fib_semantics.c | 8 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/inet_diag.c | 32 +++---- net/ipv4/ipmr.c | 10 +- net/ipv4/route.c | 8 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/udp_diag.c | 6 +- net/ipv6/addrconf.c | 32 +++---- net/ipv6/addrlabel.c | 10 +- net/ipv6/ip6mr.c | 10 +- net/ipv6/route.c | 20 ++-- net/irda/irnetlink.c | 2 +- net/key/af_key.c | 32 +++---- net/l2tp/l2tp_netlink.c | 24 ++--- net/netfilter/ipset/ip_set_core.c | 24 ++--- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/nf_conntrack_ecache.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 78 ++++++++-------- net/netfilter/nfnetlink_acct.c | 12 +-- net/netfilter/nfnetlink_cthelper.c | 12 +-- net/netfilter/nfnetlink_cttimeout.c | 12 +-- net/netfilter/nfnetlink_log.c | 18 ++-- net/netfilter/nfnetlink_queue_core.c | 28 +++--- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 4 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/af_netlink.c | 172 +++++++++++++++++------------------ net/netlink/genetlink.c | 42 ++++----- net/nfc/netlink.c | 26 +++--- net/openvswitch/actions.c | 4 +- net/openvswitch/datapath.c | 84 ++++++++--------- net/openvswitch/datapath.h | 2 +- net/openvswitch/vport.c | 2 +- net/openvswitch/vport.h | 6 +- net/packet/diag.c | 6 +- net/phonet/pn_netlink.c | 14 +-- net/sched/act_api.c | 52 +++++------ net/sched/cls_api.c | 14 +-- net/sched/sch_api.c | 44 ++++----- net/tipc/netlink.c | 2 +- net/unix/diag.c | 14 +-- net/wireless/core.h | 2 +- net/wireless/mlme.c | 16 ++-- net/wireless/nl80211.c | 110 +++++++++++----------- net/xfrm/xfrm_state.c | 10 +- net/xfrm/xfrm_user.c | 62 ++++++------- 60 files changed, 607 insertions(+), 607 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9ce430b4657c..ddf93efc133c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, - u32 pid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb, goto skip; if (fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) < 0) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe41260fbf38..093f527276a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto skip; if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; diff --git a/net/can/gw.c b/net/can/gw.c index b54d5e695b03..127879c55fb6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid, + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..58a4ba27dfe3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (unresolved) ops->unresolved_rules++; - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); if (ops->delete) ops->delete(rule); fib_rule_put(rule); @@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops) < 0) break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..c160adb38e5a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; @@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) @@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { @@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, tbl) <= 0) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 508c5df4a09c..92575370d9f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask) <= 0) @@ -1899,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nskb == NULL) return -ENOBUFS; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); return err; } @@ -2180,9 +2180,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, { struct netdev_hw_addr *ha; int err; - u32 pid, seq; + u32 portid, seq; - pid = NETLINK_CB(cb->skb).pid; + portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { @@ -2190,7 +2190,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, - pid, seq, 0, NTF_SELF); + portid, seq, 0, NTF_SELF); if (err < 0) return err; skip: diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 81f2bb62dea3..70989e672304 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1319,7 +1319,7 @@ nla_put_failure: } static int dcbnl_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid, int dcbx_ver) + u32 seq, u32 portid, int dcbx_ver) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (!ops) return -EOPNOTSUPP; - skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); + skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh); if (!skb) return -ENOBUFS; @@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, } int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE); } EXPORT_SYMBOL(dcbnl_ieee_notify); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE); } EXPORT_SYMBOL(dcbnl_cee_notify); @@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; @@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; } - reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); if (!reply_skb) { ret = -ENOBUFS; @@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nlmsg_end(reply_skb, reply_nlh); - ret = rtnl_unicast(reply_skb, &init_net, pid); + ret = rtnl_unicast(reply_skb, &init_net, portid); out: dev_put(netdev); return ret; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index f3924ab1e019..7b7e561412d3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void) } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (dn_idx < skip_naddr) continue; - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c855e8d0738f..b57419cc41a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb) return dn_route_input_slow(skb); } -static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct dn_route *rt = (struct dn_route *)skb_dst(skb); @@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; long expires; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); + err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) goto out_free; @@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void goto out_free; } - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); out_free: kfree_skb(skb); @@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { skb_dst_drop(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 16c986ab1228..f968c1b58f47 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) return payload; } -static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; + u32 portid = req ? req->portid : 0; int err = -ENOBUFS; skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); if (err < 0) { @@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, continue; if (f->fn_state & DN_S_ZOMBIE) continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->n, diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 1e9917124e75..96bb08abece2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -246,7 +246,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_start_confirm); -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; @@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; @@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb, if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) goto cont; - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d54be34cca94..22b1a7058fd3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -35,7 +35,7 @@ #include "ieee802154.h" -static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; @@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; @@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).pid, + NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f14eff506743..7b00556e184b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -311,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -345,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -382,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -395,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { @@ -417,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -464,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; @@ -563,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -649,7 +649,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } /* @@ -1246,12 +1246,12 @@ static size_t inet_nlmsg_size(void) } static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -1313,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) { rcu_read_unlock(); @@ -1335,7 +1335,7 @@ done: } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; @@ -1347,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, if (skb == NULL) goto errout; - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: if (err < 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dc1f10ed1872..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_flags = rtm->rtm_flags; cfg->fc_nlflags = nlh->nlmsg_flags; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = net; @@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; - u32 pid; + u32 portid; net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); @@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - pid = NETLINK_CB(skb).pid; /* pid of sending process */ - NETLINK_CB(skb).pid = 0; /* from kernel */ + portid = NETLINK_CB(skb).portid; /* pid of sending process */ + NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); } static int __net_init nl_fib_lookup_init(struct net *net) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da80dc14cc76..3509065e409a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, if (skb == NULL) goto errout; - err = fib_dump_info(skb, info->pid, seq, event, tb_id, + err = fib_dump_info(skb, info->portid, seq, event, tb_id, fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { @@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: @@ -989,14 +989,14 @@ failure: return ERR_PTR(err); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8d766106b540..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1873,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bc005b1435f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *r, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, pid, seq, nlmsg_flags, + skb, r, portid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, struct user_namespace *user_ns, - u32 pid, u32 seq, + u32 portid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = inet_diag_fill_req(skb, sk, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8aa7a4cf9139..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } @@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { ip_mr_forward(net, mrt, skb, c, 0); } @@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc_cache *c) + u32 portid, u32 seq, struct mfc_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d39edf16d607..940f4f4cb201 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, - struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); @@ -2146,7 +2146,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2306,12 +2306,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void rt->rt_flags |= RTCF_NOTIFY; err = rt_fill_info(net, dst, src, &fl4, skb, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 988edb63ee73..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -803,7 +803,7 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tcp_metrics_nl_family, NLM_F_MULTI, TCP_METRICS_CMD_GET); if (!hdr) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d2f336ea82ca..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 572cb660837b..1237d5d037d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3534,12 +3534,12 @@ static inline int inet6_ifaddr_msgsize(void) } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3577,7 +3577,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3586,7 +3586,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3602,7 +3602,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3611,7 +3611,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3652,7 +3652,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); @@ -3668,7 +3668,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); @@ -3683,7 +3683,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); @@ -3805,7 +3805,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ @@ -3813,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: @@ -4015,14 +4015,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -4080,7 +4080,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (!idev) goto cont; if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) <= 0) goto out; @@ -4128,14 +4128,14 @@ static inline size_t inet6_prefix_nlmsg_size(void) } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, + struct prefix_info *pinfo, u32 portid, u32 seq, int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index eb6a63632d3c..0b0171570d17 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -470,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, static int ip6addrlbl_fill(struct sk_buff *skb, struct ip6addrlbl_entry *p, u32 lseq, - u32 pid, u32 seq, int event, + u32 portid, u32 seq, int event, unsigned int flags) { - struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; @@ -503,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI)) <= 0) @@ -574,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, } err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); ip6addrlbl_put(p); @@ -585,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4532973f0dd4..08ea3f0b6e55 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } @@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 339d921cf3b6..a81c6790a648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,7 +1874,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, }; @@ -1924,7 +1924,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), }; @@ -2285,7 +2285,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2376,7 +2376,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, - int iif, int type, u32 pid, u32 seq, + int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; @@ -2392,7 +2392,7 @@ static int rt6_fill_node(struct net *net, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -2537,7 +2537,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, + NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2617,14 +2617,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).pid, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; } @@ -2644,14 +2644,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); return; errout: diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 6c7c4b92e4f8..c32971269280 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) goto err_out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); if (hdr == NULL) { ret = -EMSGSIZE; diff --git a/net/key/af_key.c b/net/key/af_key.c index 334f93b8cfcb..2ca7d7f6861c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -54,7 +54,7 @@ struct pfkey_sock { struct { uint8_t msg_version; - uint32_t msg_pid; + uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { @@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); @@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); @@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: @@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m c.data.proto = proto; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); @@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms return -EINVAL; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto); @@ -2157,7 +2157,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; - out_hdr->sadb_msg_pid = c->pid; + out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; @@ -2272,7 +2272,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); @@ -2351,7 +2351,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2597,7 +2597,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (err) goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); @@ -2634,7 +2634,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -2663,7 +2663,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb return -EBUSY; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); @@ -2682,7 +2682,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -2711,7 +2711,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6ec3f67ad3f1 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -78,7 +78,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); if (IS_ERR(hdr)) { ret = PTR_ERR(hdr); @@ -87,7 +87,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -235,7 +235,7 @@ out: return ret; } -static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel) { void *hdr; @@ -248,7 +248,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, struct l2tp_stats stats; unsigned int start; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel); if (ret < 0) goto err_out; - return genlmsg_unicast(net, msg, info->snd_pid); + return genlmsg_unicast(net, msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (tunnel == NULL) goto out; - if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel) <= 0) goto out; @@ -604,7 +604,7 @@ out: return ret; } -static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session) { void *hdr; @@ -616,7 +616,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session); if (ret < 0) goto err_out; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback continue; } - if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session) <= 0) break; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9730882697aa..ad39ef406851 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -563,13 +563,13 @@ flag_exist(const struct nlmsghdr *nlh) } static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, +start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); if (nlh == NULL) return NULL; @@ -1045,7 +1045,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; - unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; @@ -1093,7 +1093,7 @@ dump_last: pr_debug("reference set\n"); __ip_set_get(index); } - nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { @@ -1226,7 +1226,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; @@ -1241,7 +1241,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1416,7 +1416,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1428,7 +1428,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1476,7 +1476,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1489,7 +1489,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1525,7 +1525,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -1533,7 +1533,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 767cc12da0fe..0f924bf19c2b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2939,7 +2939,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_SERVICE); if (!hdr) @@ -3128,7 +3128,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DEST); if (!hdr) @@ -3257,7 +3257,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, struct netlink_callback *cb) { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DAEMON); if (!hdr) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index e7be79e640de..de9781b6464f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) goto out_unlock; item.ct = ct; - item.pid = 0; + item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a205bd6ce294..59770039b825 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -418,16 +418,16 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, net, item->pid, group, item->report, + err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -757,7 +757,7 @@ restart: #endif rcu_read_lock(); res = - ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct); @@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else { /* Flush the whole table */ nf_conntrack_flush_report(net, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); return 0; } @@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (del_timer(&ct->timeout)) { if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ @@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1616,7 +1616,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK) | events, - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); } @@ -1638,7 +1638,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK), - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } } @@ -1648,15 +1648,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1708,7 +1708,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -1734,16 +1734,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1776,14 +1776,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2073,15 +2073,15 @@ nla_put_failure: } static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2132,7 +2132,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2147,7 +2147,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -2190,7 +2190,7 @@ restart: cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { @@ -2283,14 +2283,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2344,7 +2344,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2366,7 +2366,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2382,7 +2382,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2447,7 +2447,7 @@ static int ctnetlink_create_expect(struct net *net, u16 zone, const struct nlattr * const cda[], u_int8_t u3, - u32 pid, int report) + u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2560,7 +2560,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, pid, report); + err = nf_ct_expect_related_report(exp, portid, report); err_out: nf_ct_expect_put(exp); out: @@ -2603,7 +2603,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, u3, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); } return err; @@ -2618,15 +2618,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2665,7 +2665,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; st = per_cpu_ptr(net->ct.stat, cpu); - if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d7ec92879071..589d686f0b4c 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, } static int -nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; event |= NFNL_SUBSYS_ACCT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { @@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); @@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 32a1ba3f3e27..3678073360a3 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -395,16 +395,16 @@ nla_put_failure: } static int -nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event |= NFNL_SUBSYS_CTHELPER << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -468,7 +468,7 @@ restart: cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { @@ -538,7 +538,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); @@ -547,7 +547,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index cdecbc8fe965..8847b4d8be06 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -155,16 +155,16 @@ err_proto_put: } static int -ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { @@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); @@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index be194b144297..8cb67c4dbd62 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -56,7 +56,7 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_pid; /* PID of the peer process */ + int peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -133,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) +instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -164,7 +164,7 @@ instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); inst->peer_user_ns = user_ns; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; @@ -336,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -703,7 +703,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -712,7 +712,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -774,7 +774,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_lookup_get(group_num); - if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { + if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } @@ -788,7 +788,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); @@ -947,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", inst->group_num, - inst->peer_pid, inst->qlen, + inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, atomic_read(&inst->use)); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..b8d9995b76a8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -44,7 +44,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_pid; + int peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) +instance_create(u_int16_t queue_num, int portid) { struct nfqnl_instance *inst; unsigned int h; @@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid) } inst->queue_num = queue_num; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; @@ -440,7 +440,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) { + if (!queue->peer_portid) { err = -EINVAL; goto err_out_free_nskb; } @@ -459,7 +459,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -616,7 +616,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -625,7 +625,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -650,7 +650,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) { struct nfqnl_instance *queue; @@ -658,7 +658,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) if (!queue) return ERR_PTR(-ENODEV); - if (queue->peer_pid != nlpid) + if (queue->peer_portid != nlportid) return ERR_PTR(-EPERM); return queue; @@ -698,7 +698,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -749,7 +749,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, queue = instance_lookup(queue_num); if (!queue) - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -832,7 +832,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; } @@ -844,7 +844,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); + queue = instance_create(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -1016,7 +1016,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", inst->queue_num, - inst->peer_pid, inst->queue_total, + inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 6bf878335d94..c15042f987bd 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4809e2e48b02..c5384ffc6146 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) @@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..b7944413b404 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, char *secctx; u32 secctx_len; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d348e97e131..0f2e3ad69c47 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,8 +67,8 @@ struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 pid; - u32 dst_pid; + u32 portid; + u32 dst_portid; u32 dst_group; u32 flags; u32 subscriptions; @@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_pid_hash { +struct nl_portid_hash { struct hlist_head *table; unsigned long rehash_time; @@ -118,7 +118,7 @@ struct nl_pid_hash { }; struct netlink_table { - struct nl_pid_hash hash; + struct nl_portid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; unsigned int flags; @@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) { - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; + return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } static void netlink_destroy_callback(struct netlink_callback *cb) @@ -239,17 +239,17 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { + if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -260,7 +260,7 @@ found: return sk; } -static struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_portid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_portid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size) free_pages((unsigned long)table, get_order(size)); } -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) +static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) { unsigned int omask, mask, shift; size_t osize, size; @@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) size *= 2; } - table = nl_pid_hash_zalloc(size); + table = nl_portid_hash_zalloc(size); if (!table) return 0; @@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); + __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } - nl_pid_hash_free(otable, osize); + nl_portid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1; } -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) +static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) { int avg = hash->entries >> hash->shift; - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) + if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); + nl_portid_hash_rehash(hash, 0); return 1; } @@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) int len; netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) + if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } @@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) goto err; err = -EBUSY; - if (nlk_sk(sk)->pid) + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); + if (len && nl_portid_hash_dilute(hash, len)) + head = nl_portid_hashfn(hash, portid); hash->entries++; - nlk_sk(sk)->pid = pid; + nlk_sk(sk)->portid = portid; sk_add_node(sk, head); err = 0; @@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid) { + if (nlk->portid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, - .pid = nlk->pid, + .portid = nlk->portid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); @@ -559,24 +559,24 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = task_tgid_vnr(current); + s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(osk, node, head) { if (!net_eq(sock_net(osk), net)) continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; + if (nlk_sk(osk)->portid == portid) { + /* Bind collision, search negative portid values. */ + portid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); @@ -585,7 +585,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, net, pid); + err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) goto retry; @@ -668,8 +668,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) + if (nlk->portid) { + if (nladdr->nl_pid != nlk->portid) return -EINVAL; } else { err = nladdr->nl_pid ? @@ -715,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; + nlk->dst_portid = 0; nlk->dst_group = 0; return 0; } @@ -726,12 +726,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->pid) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; + nlk->dst_portid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } @@ -750,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*nladdr); if (peer) { - nladdr->nl_pid = nlk->dst_pid; + nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->pid; + nladdr->nl_pid = nlk->portid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; @@ -772,19 +772,19 @@ static void netlink_overrun(struct sock *sk) atomic_inc(&sk->sk_drops); } -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { + nlk->dst_portid != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -935,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) + u32 portid, int nonblock) { struct sock *sk; int err; @@ -945,7 +945,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, timeo = sock_sndtimeo(ssk, nonblock); retry: - sk = netlink_getsockbypid(ssk, pid); + sk = netlink_getsockbyportid(ssk, portid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); @@ -1005,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_broadcast_data { struct sock *exclude_sk; struct net *net; - u32 pid; + u32 portid; u32 group; int failure; int delivery_failure; @@ -1026,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1078,7 +1078,7 @@ out: return 0; } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) @@ -1092,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, info.exclude_sk = ssk; info.net = net; - info.pid = pid; + info.portid = portid; info.group = group; info.failure = 0; info.delivery_failure = 0; @@ -1130,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, } EXPORT_SYMBOL(netlink_broadcast_filtered); -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation) { - return netlink_broadcast_filtered(ssk, skb, pid, group, allocation, + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, NULL, NULL); } EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { struct sock *exclude_sk; - u32 pid; + u32 portid; u32 group; int code; }; @@ -1156,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1174,14 +1174,14 @@ out: /** * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) + * @portid: the PORTID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; @@ -1189,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) int ret = 0; info.exclude_sk = ssk; - info.pid = pid; + info.portid = portid; info.group = group; /* sk->sk_err wants a positive error value */ info.code = -code; @@ -1354,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; + u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; @@ -1374,18 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = -EINVAL; if (addr->nl_family != AF_NETLINK) goto out; - dst_pid = addr->nl_pid; + dst_portid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if ((dst_group || dst_pid) && + if ((dst_group || dst_portid) && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { - dst_pid = nlk->dst_pid; + dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; } - if (!nlk->pid) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; @@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - NETLINK_CB(skb).pid = nlk->pid; + NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; @@ -1417,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: scm_destroy(siocb->scm); @@ -1482,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_pid = NETLINK_CB(skb).portid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } @@ -1683,7 +1683,7 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) } struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -1692,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; - nlh->nlmsg_pid = pid; + nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); @@ -1788,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1836,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); + NETLINK_CB(in_skb).portid); if (sk) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); @@ -1845,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) return; } - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); @@ -1900,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb); * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message - * @pid: destination netlink pid for reports or 0 + * @portid: destination netlink portid for reports or 0 * @group: destination multicast group or 0 * @report: 1 to report back, 0 to disable * @flags: allocation flags */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags) { int err = 0; if (group) { - int exclude_pid = 0; + int exclude_portid = 0; if (report) { atomic_inc(&skb->users); - exclude_pid = pid; + exclude_portid = portid; } /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ - err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); } if (report) { int err2; - err2 = nlmsg_unicast(sk, skb, pid); + err2 = nlmsg_unicast(sk, skb, portid); if (!err || err == -ESRCH) err = err2; } @@ -1951,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { @@ -1999,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) j = iter->hash_idx + 1; do { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); @@ -2038,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, - nlk->pid, + nlk->portid, nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), @@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void) order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); + hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, + nl_portid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); goto panic; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 19288b7d6135..f2aabb6f4105 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family); /** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message - * @pid: netlink pid the message is addressed to + * @portid: netlink portid the message is addressed to * @seq: sequence number (usually the one of the sender) * @family: generic netlink family * @flags: netlink message flags @@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family); * * Returns pointer to user specific header */ -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; - nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN + family->hdrsize, flags); if (nlh == NULL) return NULL; @@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } info.snd_seq = nlh->nlmsg_seq; - info.snd_pid = NETLINK_CB(skb).pid; + info.snd_portid = NETLINK_CB(skb).portid; info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; @@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = { .netnsok = true, }; -static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, +static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -701,7 +701,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { @@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, struct nlattr *nla_grps; struct nlattr *nest; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++n < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, CTRL_CMD_NEWFAMILY) < 0) goto errout; @@ -773,7 +773,7 @@ errout: } static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + err = ctrl_fill_info(family, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return -ENOENT; } - msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq, CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -971,7 +971,7 @@ problem: subsys_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { struct sk_buff *tmp; @@ -986,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - pid, group, flags); + portid, group, flags); if (err) goto error; } @@ -994,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); error: kfree_skb(skb); return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_mcast(skb, pid, group, flags); + return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, +void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1016,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, pid, group, report, flags); + nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..4bbb70e32d1e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; @@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) @@ -347,13 +347,13 @@ free_msg: } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, - u32 pid, u32 seq, + u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; @@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, while (dev) { int rc; - rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid, + rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; @@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) goto out_putdev; } - rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq, + rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; @@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) - dev->genl_data.poll_req_pid = info->snd_pid; + dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - if (dev->genl_data.poll_req_pid != info->snd_pid) { + if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -771,15 +771,15 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + if (dev->genl_data.poll_req_portid == n->portid) { nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; } dev = nfc_device_iter_next(&iter); } @@ -792,7 +792,7 @@ out: void nfc_genl_data_init(struct nfc_genl_data *genl_data) { - genl_data->poll_req_pid = 0; + genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 0da687769f56..c7425f32e7db 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = NULL; - upcall.pid = 0; + upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.pid = nla_get_u32(a); + upcall.portid = nla_get_u32(a); break; } } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 105a0b5adc51..56327e877ed9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -225,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.pid = p->upcall_pid; + upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -261,7 +261,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, int dp_ifindex; int err; - if (upcall_info->pid == 0) { + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } @@ -395,7 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); @@ -780,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { /* Called with genl_lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, - struct sk_buff *skb, u32 pid, + struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; @@ -795,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -879,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 pid, u32 seq, u8 cmd) + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -888,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } @@ -970,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->hash = ovs_flow_hash(&key, key_len); ovs_flow_tbl_insert(table, flow); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { @@ -1008,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_flow_deferred_free_acts(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ @@ -1020,7 +1020,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else @@ -1061,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!flow) return -ENOENT; - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1103,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_deferred_free(flow); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; } @@ -1137,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; @@ -1191,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { }; static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; @@ -1222,7 +1222,7 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1232,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1311,7 +1311,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1322,7 +1322,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1332,7 +1332,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; @@ -1394,7 +1394,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return err; - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1402,7 +1402,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) __dp_destroy(dp); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1419,7 +1419,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); @@ -1428,7 +1428,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) return 0; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1444,7 +1444,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1461,7 +1461,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && - ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; @@ -1521,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Called with RTNL lock or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -1537,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid)) + nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1559,7 +1559,7 @@ error: } /* Called with RTNL lock or RCU read lock. */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1569,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1661,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1707,9 +1707,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (err) goto exit_unlock; if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { netlink_set_err(sock_net(skb->sk)->genl_sock, 0, @@ -1717,7 +1717,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1743,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1751,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1773,7 +1773,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1808,7 +1808,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_NEW) < 0) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 129ec5480758..031dfbf37c93 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -129,7 +129,7 @@ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; - u32 pid; + u32 portid; }; static inline struct net *ovs_dp_get_net(struct datapath *dp) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 1abd9609ba78..9055dd698c70 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -125,7 +125,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_pid = parms->upcall_pid; + vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c56e4836e93b..3f7961ea3c56 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @upcall_pid: The Netlink port to use for packets received on this port that + * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -83,7 +83,7 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - u32 upcall_pid; + u32 upcall_portid; struct hlist_node hash_node; struct hlist_node dp_hash_node; @@ -113,7 +113,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_pid; + u32 upcall_portid; }; /** diff --git a/net/packet/diag.c b/net/packet/diag.c index 39bce0d50df9..8db6e21c46bd 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -126,13 +126,13 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; struct packet_sock *po = pkt_sk(sk); - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) return -EMSGSIZE; @@ -184,7 +184,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (num < s_num) goto next; - if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) goto done; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7dd762a464e5..83a8389619aa 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -33,7 +33,7 @@ /* Device address handling */ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event); + u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) { @@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) } static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; } @@ -165,12 +165,12 @@ out: /* Routes handling */ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE)) goto out; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e3d2c78cb52c..102761d294cb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -644,7 +644,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); @@ -678,7 +678,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, +act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; @@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnl_unicast(skb, net, pid); + return rtnl_unicast(skb, net, portid); } static struct tc_action * -tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; @@ -762,7 +762,7 @@ static struct tc_action *create_a(int i) } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 pid) + struct nlmsghdr *n, u32 portid) { struct sk_buff *skb; unsigned char *b; @@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -841,7 +841,7 @@ noflush_out: static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int event) + u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, pid); + return tca_action_flush(net, tb[1], n, portid); else return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(tb[i], n, pid); + act = tcf_action_get_1(tb[i], n, portid); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, pid, n, head, event); + ret = act_get_notify(net, portid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; @@ -905,7 +905,7 @@ err: } static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_kfree_skb; t = nlmsg_data(nlh); @@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -953,7 +953,7 @@ out_kfree_skb: static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int ovr) + u32 portid, int ovr) { int ret = 0; struct tc_action *act; @@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_DELACTION); + portid, RTM_DELACTION); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_GETACTION); + portid, RTM_GETACTION); break; default: BUG(); @@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto out_module_put; } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; @@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_cancel(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; - if (NETLINK_CB(cb->skb).pid && ret) + if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc3ef5aef355..7ae02892437c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -343,13 +343,13 @@ errout: } static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 pid, u32 seq, u16 flags, int event) + unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, unsigned long fh, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, { struct tcf_dump_args *a = (void *)arg; - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a08b4ab3e421..a18d975db59c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1185,7 +1185,7 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: @@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } else { if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, continue; } if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 pid = tcm->tcm_parent; + u32 portid = tcm->tcm_parent; u32 clid = tcm->tcm_handle; u32 qid = TC_H_MAJ(clid); int err; @@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - if (pid != TC_H_ROOT) { - u32 qid1 = TC_H_MAJ(pid); + if (portid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ @@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now qid is genuine qdisc handle consistent * both with parent and child. * - * TC_H_MAJ(pid) still may be unspecified, complete it now. + * TC_H_MAJ(portid) still may be unspecified, complete it now. */ - if (pid) - pid = TC_H_MAKE(qid, pid); + if (portid) + portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = dev->qdisc->handle; @@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now try to get class */ if (clid == 0) { - if (pid == TC_H_ROOT) + if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); @@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, pid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl); if (err == 0) tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); @@ -1492,7 +1492,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, unsigned long cl, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; - return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 47a839df27dc..6675914dc592 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid); + genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); } return 0; diff --git a/net/unix/diag.c b/net/unix/diag.c index 750b13408449..06748f108a57 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags); if (!nlh) return -EMSGSIZE; @@ -159,7 +159,7 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(req->udiag_states & (1 << sk->sk_state))) goto next; if (sk_diag_dump(sk, skb, req, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) goto done; @@ -267,7 +267,7 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); @@ -277,7 +277,7 @@ again: goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index bc7430b54771..a343be4a52bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -55,7 +55,7 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; - u32 ap_beacons_nlpid; + u32 ap_beacons_nlportid; /* protected by RTNL only */ int num_running_ifaces; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8fd0242ee169..ec7fcee5bad6 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -615,7 +615,7 @@ EXPORT_SYMBOL(cfg80211_del_sta); struct cfg80211_mgmt_registration { struct list_head list; - u32 nlpid; + u32 nlportid; int match_len; @@ -624,7 +624,7 @@ struct cfg80211_mgmt_registration { u8 match[]; }; -int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) { @@ -672,7 +672,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; - nreg->nlpid = snd_pid; + nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); @@ -685,7 +685,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, return err; } -void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); @@ -694,7 +694,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid != nlpid) + if (reg->nlportid != nlportid) continue; if (rdev->ops->mgmt_frame_register) { @@ -710,8 +710,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_unlock_bh(&wdev->mgmt_registrations_lock); - if (nlpid == wdev->ap_unexpected_nlpid) - wdev->ap_unexpected_nlpid = 0; + if (nlportid == wdev->ap_unexpected_nlportid) + wdev->ap_unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -872,7 +872,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, freq, sig_mbm, buf, len, gfp)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 787aeaa902fe..34eb5c07a567 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr) } /* message building helper */ -static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ - return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); + return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_channel(struct sk_buff *msg, @@ -851,7 +851,7 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; @@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; @@ -1267,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) { idx--; @@ -1290,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1736,14 +1736,14 @@ static inline u64 wdev_id(struct wireless_dev *wdev) ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); } -static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) return -1; @@ -1807,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; continue; } - if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { mutex_unlock(&rdev->devlist_mtx); @@ -1838,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, dev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2056,7 +2056,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2191,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2769,7 +2769,7 @@ nla_put_failure: return false; } -static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -2778,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -2931,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev, netdev, mac_addr, &sinfo) < 0) @@ -2977,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3303,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr); } -static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) @@ -3311,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *pinfoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -3389,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, dst, next_hop, &pinfo) < 0) @@ -3438,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3679,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; @@ -3998,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; @@ -4616,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, ASSERT_WDEV_LOCK(wdev); - hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags, + hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; @@ -4735,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return skb->len; } -static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; @@ -4836,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, } if (nl80211_send_survey(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, &survey) < 0) @@ -5451,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } while (1) { - void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid, + void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; @@ -5491,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, static struct sk_buff * __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 pid, u32 seq, gfp_t gfp) + int approxlen, u32 portid, u32 seq, gfp_t gfp) { struct sk_buff *skb; void *hdr; @@ -5501,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, if (!skb) return NULL; - hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); if (!hdr) { kfree_skb(skb); return NULL; @@ -5531,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, return NULL; return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_portid, rdev->testmode_info->snd_seq, GFP_KERNEL); } @@ -5867,7 +5867,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (IS_ERR(hdr)) { @@ -6086,7 +6086,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6167,7 +6167,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (IS_ERR(hdr)) { @@ -6284,7 +6284,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; @@ -6486,7 +6486,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; @@ -6760,10 +6760,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; - if (wdev->ap_unexpected_nlpid) + if (wdev->ap_unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlpid = info->snd_pid; + wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } @@ -6793,7 +6793,7 @@ static int nl80211_probe_client(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (IS_ERR(hdr)) { @@ -6828,10 +6828,10 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; - if (rdev->ap_beacons_nlpid) + if (rdev->ap_beacons_nlportid) return -EBUSY; - rdev->ap_beacons_nlpid = info->snd_pid; + rdev->ap_beacons_nlportid = info->snd_portid; return 0; } @@ -7628,12 +7628,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -7657,11 +7657,11 @@ static int nl80211_send_sched_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct net_device *netdev, - u32 pid, u32 seq, int flags, u32 cmd) + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -8370,9 +8370,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct sk_buff *msg; void *hdr; int err; - u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); - if (!nlpid) + if (!nlportid) return false; msg = nlmsg_new(100, gfp); @@ -8396,7 +8396,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: @@ -8420,7 +8420,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlportid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { @@ -8449,7 +8449,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, genlmsg_end(msg, hdr); - return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -8804,9 +8804,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid); - if (!nlpid) + if (!nlportid) return; msg = nlmsg_new(len + 100, gfp); @@ -8829,7 +8829,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, genlmsg_end(msg, hdr); - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: @@ -8853,9 +8853,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) - cfg80211_mlme_unregister_socket(wdev, notify->pid); - if (rdev->ap_beacons_nlpid == notify->pid) - rdev->ap_beacons_nlpid = 0; + cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (rdev->ap_beacons_nlportid == notify->portid) + rdev->ap_beacons_nlportid = 0; } rcu_read_unlock(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7856c33898fa..30edad44e7fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) { @@ -1674,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c) EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); @@ -1726,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 354070adb5ef..b313d932d678 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -603,7 +603,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -676,7 +676,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -826,7 +826,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -904,7 +904,7 @@ static inline size_t xfrm_spdinfo_msgsize(void) } static int build_spdinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -913,7 +913,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -946,17 +946,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -967,7 +967,7 @@ static inline size_t xfrm_sadinfo_msgsize(void) } static int build_sadinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -975,7 +975,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -1003,17 +1003,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1033,7 +1033,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1114,7 +1114,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1401,7 +1401,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1486,7 +1486,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -1621,7 +1621,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); } } else { uid_t loginuid = audit_get_loginuid(current); @@ -1638,7 +1638,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } @@ -1668,7 +1668,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); @@ -1695,7 +1695,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1777,11 +1777,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1827,7 +1827,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1862,7 +1862,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2370,7 +2370,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2429,7 +2429,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2497,7 +2497,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2696,7 +2696,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2756,7 +2756,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2810,7 +2810,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; -- cgit v1.2.1 From c6bb8136c95ce16545f8c4028e958c0ee6c86f23 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Sep 2012 21:17:17 +0000 Subject: xfrm: Report user triggered expirations against the users socket When a policy expiration is triggered from user space the request travels through km_policy_expired and ultimately into xfrm_exp_policy_notify which calls build_polexpire. build_polexpire uses the netlink port passed to km_policy_expired as the source port for the netlink message it builds. When a state expiration is triggered from user space the request travles through km_state_expired and ultimately into xfrm_exp_state_notify which calls build_expire. build_expire uses the netlink port passed to km_state_expired as the source port for the netlink message it builds. Pass nlh->nlmsg_pid from the user generated netlink message that requested the expiration to km_policy_expired and km_state_expired instead of current->pid which is not a netlink port number. Cc: Jamal Hadi Salim Signed-off-by: "Eric W. Biederman" Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b313d932d678..5d6eb4b3c089 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1930,7 +1930,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, // reset the timers here? WARN(1, "Dont know what to do with soft policy expire\n"); } - km_policy_expired(xp, p->dir, up->hard, current->pid); + km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); out: xfrm_pol_put(xp); @@ -1958,7 +1958,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; if (x->km.state != XFRM_STATE_VALID) goto out; - km_state_expired(x, ue->hard, current->pid); + km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { uid_t loginuid = audit_get_loginuid(current); -- cgit v1.2.1 From b6069a95706ca5738be3f5d90fd286cbd13ac695 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Sep 2012 22:03:35 +0000 Subject: filter: add MOD operation Add a new ALU opcode, to compute a modulus. Commit ffe06c17afbbb used an ancillary to implement XOR_X, but here we reserve one of the available ALU opcode to implement both MOD_X and MOD_K Signed-off-by: Eric Dumazet Suggested-by: George Bakos Cc: Jay Schulist Cc: Jiri Pirko Cc: Andi Kleen Signed-off-by: David S. Miller --- net/core/filter.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 907efd27ec77..fbe3a8d12570 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -167,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb, case BPF_S_ALU_DIV_K: A = reciprocal_divide(A, K); continue; + case BPF_S_ALU_MOD_X: + if (X == 0) + return 0; + A %= X; + continue; + case BPF_S_ALU_MOD_K: + A %= K; + continue; case BPF_S_ALU_AND_X: A &= X; continue; @@ -469,6 +477,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, + [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, + [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, @@ -531,6 +541,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) return -EINVAL; ftest->k = reciprocal_value(ftest->k); break; + case BPF_S_ALU_MOD_K: + /* check for division by zero */ + if (ftest->k == 0) + return -EINVAL; + break; case BPF_S_LD_MEM: case BPF_S_LDX_MEM: case BPF_S_ST: -- cgit v1.2.1 From 566f26aa705609d05940289036ab914c8a3be707 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 9 Sep 2012 18:38:27 +0000 Subject: caif: move the dereference below the NULL test The dereference should be moved below the NULL test. spatch with a semantic match is used to found this. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/caif/cfsrvl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index dd485f6128e8..ba217e90765e 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -211,9 +211,10 @@ void caif_client_register_refcnt(struct cflayer *adapt_layer, void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; - service = container_of(adapt_layer->dn, struct cfsrvl, layer); - WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL); + if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL)) + return; + service = container_of(adapt_layer->dn, struct cfsrvl, layer); service->hold = hold; service->put = put; } -- cgit v1.2.1 From fdd6681d92a70b3db73cdb61c6b4053f2f8003b3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 10 Sep 2012 02:48:44 +0000 Subject: ipv6: remove some useless RCU read lock After this commit: commit 97cac0821af4474ec4ba3a9e7a36b98ed9b6db88 Author: David S. Miller Date: Mon Jul 2 22:43:47 2012 -0700 ipv6: Store route neighbour in rt6_info struct. we no longer use RCU to protect route neighbour. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 13 ++----------- net/ipv6/route.c | 15 ++------------- 2 files changed, 4 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a4f6263fddca..3dd4a37488d5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - rcu_read_lock(); rt = (struct rt6_info *) dst; neigh = rt->n; - if (neigh) { - int res = dst_neigh_output(dst, neigh, skb); + if (neigh) + return dst_neigh_output(dst, neigh, skb); - rcu_read_unlock(); - return res; - } - rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -983,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rcu_read_lock(); rt = (struct rt6_info *) *dst; n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { @@ -991,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi6 fl_gw6; int redirect; - rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1011,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } - } else { - rcu_read_unlock(); } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a81c6790a648..399613b7972f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -451,10 +451,9 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - rcu_read_lock(); neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - goto out; + return; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -470,8 +469,6 @@ static void rt6_probe(struct rt6_info *rt) } else { read_unlock_bh(&neigh->lock); } -out: - rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -498,7 +495,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; int m; - rcu_read_lock(); neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) @@ -516,7 +512,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; - rcu_read_unlock(); return m; } @@ -2496,15 +2491,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - rcu_read_lock(); n = rt->n; if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { - rcu_read_unlock(); + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) goto nla_put_failure; - } } - rcu_read_unlock(); if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) @@ -2706,14 +2697,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - rcu_read_lock(); n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } - rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, -- cgit v1.2.1 From 1c463e57b32e3b6a3250fe75d1d56cb598d664e6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 10 Sep 2012 09:13:07 -0700 Subject: net: fix net/core/sock.c build error Fix net/core/sock.c build error when CONFIG_INET is not enabled: net/built-in.o: In function `sock_edemux': (.text+0xd396): undefined reference to `inet_twsk_put' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 7f64467535d1..305792076121 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1525,9 +1525,11 @@ void sock_edemux(struct sk_buff *skb) { struct sock *sk = skb->sk; +#ifdef CONFIG_INET if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_put(inet_twsk(sk)); else +#endif sock_put(sk); } EXPORT_SYMBOL(sock_edemux); -- cgit v1.2.1 From 3a6a0d8ee88d23e7dda28808c2c890c4db50ccb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Sep 2012 17:25:27 +0200 Subject: mac80211: remove unneeded CONFIG_PM ifdef The functions are only called if CONFIG_PM is set as the callers are under an ifdef, so there's no need to also define no-op functions. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 887452327ba8..d11a6f837d69 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1368,7 +1368,6 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, int ieee80211_reconfig(struct ieee80211_local *local); void ieee80211_stop_device(struct ieee80211_local *local); -#ifdef CONFIG_PM int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); @@ -1382,18 +1381,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) return ieee80211_reconfig(hw_to_local(hw)); } -#else -static inline int __ieee80211_suspend(struct ieee80211_hw *hw, - struct cfg80211_wowlan *wowlan) -{ - return 0; -} - -static inline int __ieee80211_resume(struct ieee80211_hw *hw) -{ - return 0; -} -#endif /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ -- cgit v1.2.1 From bdfc87f7d1e253e0a61e2fc6a75ea9d76f7fc03a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Sep 2012 13:11:12 +0000 Subject: net-sched: sch_cbq: avoid infinite loop Its possible to setup a bad cbq configuration leading to an infinite loop in cbq_classify() DEV_OUT=eth0 ICMP="match ip protocol 1 0xff" U32="protocol ip u32" DST="match ip dst" tc qdisc add dev $DEV_OUT root handle 1: cbq avpkt 1000 \ bandwidth 100mbit tc class add dev $DEV_OUT parent 1: classid 1:1 cbq \ rate 512kbit allot 1500 prio 5 bounded isolated tc filter add dev $DEV_OUT parent 1: prio 3 $U32 \ $ICMP $DST 192.168.3.234 flowid 1: Reported-by: Denys Fedoryschenko Tested-by: Denys Fedoryschenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6aabd77d1cfd..564b9fc8efd3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL || cl->level >= head->level) + if (cl == NULL) goto fallback; } - + if (cl->level >= head->level) + goto fallback; #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: -- cgit v1.2.1 From 16af511a666827eaf5802144f09e2fb7b0942c99 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 02:04:53 +0000 Subject: netfilter: log: Fix log-level processing auto75914331@hushmail.com reports that iptables does not correctly output the KERN_. $IPTABLES -A RULE_0_in -j LOG --log-level notice --log-prefix "DENY in: " result with linux 3.6-rc5 Sep 12 06:37:29 xxxxx kernel: <5>DENY in: IN=eth0 OUT= MAC=....... result with linux 3.5.3 and older: Sep 9 10:43:01 xxxxx kernel: DENY in: IN=eth0 OUT= MAC...... commit 04d2c8c83d0 ("printk: convert the format for KERN_ to a 2 byte pattern") updated the syslog header style but did not update netfilter uses. Do so. Use KERN_SOH and string concatenation instead of "%c" KERN_SOH_ASCII as suggested by Eric Dumazet. Signed-off-by: Joe Perches cc: auto75914331@hushmail.com Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 2 +- net/netfilter/xt_LOG.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f88ee537fb2b..92de5e5f9db2 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -80,7 +80,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, unsigned int bitmask; spin_lock_bh(&ebt_log_lock); - printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", + printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : "", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 2a4f9693e799..91e9af4d1f42 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -443,8 +443,8 @@ log_packet_common(struct sbuff *m, const struct nf_loginfo *loginfo, const char *prefix) { - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, + sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER -- cgit v1.2.1 From c7cbb9173d3c6d41cbfbca451902d66fe6440cbb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 11 Sep 2012 17:46:19 +0200 Subject: netfilter: ctnetlink: fix module auto-load in ctnetlink_parse_nat (c7232c9 netfilter: add protocol independent NAT core) added incorrect locking for the module auto-load case in ctnetlink_parse_nat. That function is always called from ctnetlink_create_conntrack which requires no locking. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a205bd6ce294..090d267ee605 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1120,16 +1120,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); - spin_unlock_bh(&nf_conntrack_lock); nfnl_unlock(); if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { nfnl_lock(); - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); return -EOPNOTSUPP; } nfnl_lock(); - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); #else err = -EOPNOTSUPP; -- cgit v1.2.1 From e29fe837bfa3ca0a9f4ef1d4a90e6e0a74b6b8a0 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:32 +0000 Subject: net_sched: gred: correct comment about qavg calculation in RIO mode Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e901583e4ea5..fca73cdf44d9 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -176,7 +176,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } - /* sum up all the qaves of prios <= to ours to get the new qave */ + /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; -- cgit v1.2.1 From c22e464022f935b0cbd8724b1d99d800d49518a9 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:33 +0000 Subject: net_sched: gred: eliminate redundant DP prio comparisons Each pair of DPs only needs to be compared once when searching for a non-unique prio value. Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fca73cdf44d9..e19d4ebfea1c 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -102,9 +102,8 @@ static inline int gred_wred_mode_check(struct Qdisc *sch) if (q == NULL) continue; - for (n = 0; n < table->DPs; n++) - if (table->tab[n] && table->tab[n] != q && - table->tab[n]->prio == q->prio) + for (n = i + 1; n < table->DPs; n++) + if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } -- cgit v1.2.1 From 1fe37b106b039d9358fd1211c39b1fa199e547a8 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:34 +0000 Subject: net_sched: gred: fix qave reporting via netlink q->vars.qavg is a Wlog scaled value, but q->backlog is not. In order to pass q->vars.qavg as the backlog value, we need to un-scale it. Additionally, the qave value returned via netlink should not be Wlog scaled, so we need to un-scale the result of red_calc_qavg(). This caused artificially high values for "Average Queue" to be shown by 'tc -s -d qdisc', but did not affect the actual operation of GRED. Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e19d4ebfea1c..b2570b59d85e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -534,6 +534,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; + unsigned long qavg; memset(&opt, 0, sizeof(opt)); @@ -565,7 +566,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_wred_mode(table)) gred_load_wred_set(table, q); - opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); + qavg = red_calc_qavg(&q->parms, &q->vars, + q->vars.qavg >> q->parms.Wlog); + opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) -- cgit v1.2.1 From ba1bf474eae07a128fae6252bf7d68eaef0eacf8 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:35 +0000 Subject: net_sched: gred: actually perform idling in WRED mode gred_dequeue() and gred_drop() do not seem to get called when the queue is empty, meaning that we never start idling while in WRED mode. And since qidlestart is not stored by gred_store_wred_set(), we would never stop idling while in WRED mode if we ever started. This messes up the average queue size calculation that influences packet marking/dropping behavior. Now, we start WRED mode idling as we are removing the last packet from the queue. Also we now actually stop WRED mode idling when we are enqueuing a packet. Cc: Bruce Osler Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b2570b59d85e..d42234c0f13b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -136,6 +136,7 @@ static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; + table->wred_set.qidlestart = q->vars.qidlestart; } static inline int gred_use_ecn(struct gred_sched *t) @@ -259,16 +260,18 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch) } else { q->backlog -= qdisc_pkt_len(skb); - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } return skb; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return NULL; } @@ -290,19 +293,20 @@ static unsigned int gred_drop(struct Qdisc *sch) q->backlog -= len; q->stats.other++; - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } qdisc_drop(skb, sch); return len; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return 0; - } static void gred_reset(struct Qdisc *sch) -- cgit v1.2.1 From d530d6df96ee28902486f8e11815ef9ad3a1cd1b Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 12 Sep 2012 20:32:25 +0000 Subject: netprio_cgroup: Remove update_netdev_tables() since it is unnecessary The update_netdev_tables() function appears to be unnecessary, since the write_update_netdev_table() function will adjust the priomaps as and when required anyway. So drop the usage of update_netdev_tables() entirely. Signed-off-by: Srivatsa S. Bhat Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..fd339bb00106 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -109,32 +109,6 @@ static int write_update_netdev_table(struct net_device *dev) return ret; } -static int update_netdev_tables(void) -{ - int ret = 0; - struct net_device *dev; - u32 max_len; - struct netprio_map *map; - - rtnl_lock(); - max_len = atomic_read(&max_prioidx) + 1; - for_each_netdev(&init_net, dev) { - map = rtnl_dereference(dev->priomap); - /* - * don't allocate priomap if we didn't - * change net_prio.ifpriomap (map == NULL), - * this will speed up skb_update_prio. - */ - if (map && map->priomap_len < max_len) { - ret = extend_netdev_table(dev, max_len); - if (ret < 0) - break; - } - } - rtnl_unlock(); - return ret; -} - static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; @@ -153,12 +127,6 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) goto out; } - ret = update_netdev_tables(); - if (ret < 0) { - put_prioidx(cs->prioidx); - goto out; - } - return &cs->css; out: kfree(cs); -- cgit v1.2.1 From f05ba7fccf0c5f0422378adaffcb119d08b9f304 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 12 Sep 2012 20:32:34 +0000 Subject: netprio_cgroup: Use memcpy instead of the for-loop to copy priomap Replace the current (inefficient) for-loop with memcpy, to copy priomap. Signed-off-by: Srivatsa S. Bhat Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index fd339bb00106..45c503e45fc6 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -73,7 +73,6 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len) ((sizeof(u32) * new_len)); struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); struct netprio_map *old_priomap; - int i; old_priomap = rtnl_dereference(dev->priomap); @@ -82,10 +81,10 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len) return -ENOMEM; } - for (i = 0; - old_priomap && (i < old_priomap->priomap_len); - i++) - new_priomap->priomap[i] = old_priomap->priomap[i]; + if (old_priomap) + memcpy(new_priomap->priomap, old_priomap->priomap, + old_priomap->priomap_len * + sizeof(old_priomap->priomap[0])); new_priomap->priomap_len = new_len; -- cgit v1.2.1 From 417962a02ba283c8532d61474dc08e0a966cd269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?= =?UTF-8?q?=E6=98=8E?= Date: Mon, 10 Sep 2012 18:16:49 +0000 Subject: ipv6: Add labels for site-local and 6bone testing addresses (RFC6724) Added labels for site-local addresses (fec0::/10) and 6bone testing addresses (3ffe::/16) in order to depreference them. Note that the RFC introduced new rows for Teredo, ULA and 6to4 addresses in the default policy table. Some of them have different labels from ours. For backward compatibility, we do not change the "default" labels. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0b0171570d17..4be23da32b89 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) } /* - * Default policy table (RFC3484 + extensions) + * Default policy table (RFC6724 + extensions) * * prefix addr_type label * ------------------------------------------------------------------------- @@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) * 2001:10::/28 N/A 7 ORCHID (RFC 4843) + * fec0::/10 N/A 11 Site-local + * (deprecated by RFC3879) + * 3ffe::/16 N/A 12 6bone * * Note: 0xffffffff is used if we do not have any policies. + * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. */ #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL @@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table .prefix = &(struct in6_addr){{{ 0xfc }}}, .prefixlen = 7, .label = 5, + },{ /* fec0::/10 */ + .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + .prefixlen = 10, + .label = 11, },{ /* 2002::/16 */ .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, .prefixlen = 16, .label = 2, + },{ /* 3ffe::/16 */ + .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + .prefixlen = 16, + .label = 12, },{ /* 2001::/32 */ .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, .prefixlen = 32, -- cgit v1.2.1 From 91b4b04ff85de9086c959138d747d2808cc83a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?= =?UTF-8?q?=E6=98=8E?= Date: Mon, 10 Sep 2012 18:41:07 +0000 Subject: ipv6: Compare addresses only bits up to the prefix length (RFC6724). Compare bits up to the source address's prefix length only to allows DNS load balancing to continue to be used as a tie breaker. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1237d5d037d8..5fd8ec895c8b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1070,8 +1070,10 @@ static int ipv6_get_saddr_eval(struct net *net, break; case IPV6_SADDR_RULE_PREFIX: /* Rule 8: Use longest matching prefix */ - score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, - dst->addr); + ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); + if (ret > score->ifa->prefix_len) + ret = score->ifa->prefix_len; + score->matchlen = ret; break; default: ret = 0; -- cgit v1.2.1 From fb0af4c74f200e3c4846d91d8f2f8b265450bba7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Sep 2012 21:47:51 +0000 Subject: ipv6: route templates can be const We kmemdup() templates, so they can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 399613b7972f..f568ac697987 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -222,7 +222,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 255, }; -static struct rt6_info ip6_null_entry_template = { +static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, @@ -242,7 +242,7 @@ static struct rt6_info ip6_null_entry_template = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static struct rt6_info ip6_prohibit_entry_template = { +static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, @@ -257,7 +257,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .rt6i_ref = ATOMIC_INIT(1), }; -static struct rt6_info ip6_blk_hole_entry_template = { +static const struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, -- cgit v1.2.1 From 5744dd9b71c6b9df944c6a32a000d4a564a2abd7 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 11 Sep 2012 21:59:01 +0000 Subject: ipv6: replace write lock with read lock when get route info geting route info does not write rt->rt6i_table, so replace write lock with read lock Suggested-by: Eric Dumazet Signed-off-by: Li RongQing Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- net/ipv6/route.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5fd8ec895c8b..719a828fb67f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1708,7 +1708,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (table == NULL) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; @@ -1723,7 +1723,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f568ac697987..83dafa528936 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1837,7 +1837,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1853,7 +1853,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } @@ -1896,7 +1896,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && @@ -1905,7 +1905,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev } if (rt) dst_hold(&rt->dst); - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } -- cgit v1.2.1 From 6af773e786ad617b0264ebe06ba60675c01f3e51 Mon Sep 17 00:00:00 2001 From: Nishank Trivedi Date: Wed, 12 Sep 2012 13:32:49 +0000 Subject: pktgen: fix crash with vlan and packet size less than 46 If vlan option is being specified in the pktgen and packet size being requested is less than 46 bytes, despite being illogical request, pktgen should not crash the kernel. BUG: unable to handle kernel paging request at ffff88021fb82000 Process kpktgend_0 (pid: 1184, threadinfo ffff880215f1a000, task ffff880218544530) Call Trace: [] ? pktgen_finalize_skb+0x222/0x300 [pktgen] [] ? build_skb+0x34/0x1c0 [] pktgen_thread_worker+0x5d1/0x1790 [pktgen] [] ? igb_xmit_frame_ring+0xa30/0xa30 [igb] [] ? wake_up_bit+0x40/0x40 [] ? wake_up_bit+0x40/0x40 [] ? spin+0x240/0x240 [pktgen] [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? flush_kthread_worker+0x80/0x80 [] ? gs_change+0x13/0x13 The root cause of why pktgen is not able to handle this case is due to comparison of signed (datalen) and unsigned data (sizeof), which eventually passes a huge number to skb_put(). Signed-off-by: Nishank Trivedi Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index cce9e53528b1..148e73d2c451 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2721,7 +2721,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; - if (datalen < sizeof(struct pktgen_hdr)) + if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); udph->source = htons(pkt_dev->cur_udp_src); -- cgit v1.2.1 From c6089735e7243a10faad676680c6e18d50959f74 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 May 2012 16:37:54 -0600 Subject: userns: net: Call key_alloc with GLOBAL_ROOT_UID, GLOBAL_ROOT_GID instead of 0, 0 In net/dns_resolver/dns_key.c and net/rxrpc/ar-key.c make them work with user namespaces enabled where key_alloc takes kuids and kgids. Pass GLOBAL_ROOT_UID and GLOBAL_ROOT_GID instead of bare 0's. Cc: Sage Weil Cc: ceph-devel@vger.kernel.org Cc: David Howells Cc: David Miller Cc: linux-afs@lists.infradead.org Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- net/dns_resolver/dns_key.c | 3 ++- net/rxrpc/ar-key.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..9807945a56d9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -259,7 +259,8 @@ static int __init init_dns_resolver(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + keyring = key_alloc(&key_type_keyring, ".dns_resolver", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 8b1f9f49960f..011d2384b115 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -948,7 +948,8 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, + key = key_alloc(&key_type_rxrpc, "x", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -994,7 +995,8 @@ struct key *rxrpc_get_null_key(const char *keyname) struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, + key = key_alloc(&key_type_rxrpc, keyname, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; -- cgit v1.2.1 From 04b7b2ff50fc77380c1e711f1d7223734547e41b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2012 13:41:37 +0200 Subject: mac80211: handle power constraint/country IE better Currently, mac80211 uses the power constraint IE, and reduces the regulatory max TX power by it. This can cause issues if the AP is advertising a large power constraint value matching a high TX power in its country IE, for example in this case: ... Country: US Environment: Indoor/Outdoor ... Channels [157 - 157] @ 30 dBm ... Power constraint: 13 dB ... What happened here is that our local regulatory TX power is 15 dBm, and gets reduced by 13 dB so we end up with only 2 dBm effective TX power, which is way too low. Instead, handle the country IE/power constraint IE combined and restrict our TX power to the max of the regulatory power and the maximum power advertised by the AP, in this case 17 dBm (= 30 dBm - 13 dB). Also print a message when this happens to let the user know and help us debug issues with it. Reported-by: Carl A. Cook Tested-by: Carl A. Cook Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/main.c | 8 ++--- net/mac80211/mlme.c | 84 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 72 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d11a6f837d69..8c804550465b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1059,7 +1059,7 @@ struct ieee80211_local { bool disable_dynamic_ps; int user_power_level; /* in dBm */ - int power_constr_level; /* in dBm */ + int ap_power_level; /* in dBm */ enum ieee80211_smps_mode smps_mode; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index bd7529363193..416e85eae2d2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -150,13 +150,11 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) if (test_bit(SCAN_SW_SCANNING, &local->scanning) || test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) || - test_bit(SCAN_HW_SCANNING, &local->scanning)) + test_bit(SCAN_HW_SCANNING, &local->scanning) || + !local->ap_power_level) power = chan->max_power; else - power = local->power_constr_level ? - min(chan->max_power, - (chan->max_reg_power - local->power_constr_level)) : - chan->max_power; + power = min(chan->max_power, local->ap_power_level); if (local->user_power_level >= 0) power = min(power, local->user_power_level); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ca34137a3b5..45a9fa63b368 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -779,18 +779,71 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, - u16 capab_info, u8 *pwr_constr_elem) + struct ieee80211_channel *channel, + const u8 *country_ie, u8 country_ie_len, + const u8 *pwr_constr_elem) { - struct ieee80211_conf *conf = &sdata->local->hw.conf; + struct ieee80211_country_ie_triplet *triplet; + int chan = ieee80211_frequency_to_channel(channel->center_freq); + int i, chan_pwr, chan_increment, new_ap_level; + bool have_chan_pwr = false; - if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT)) + /* Invalid IE */ + if (country_ie_len % 2 || country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) return; - if ((*pwr_constr_elem <= conf->channel->max_reg_power) && - (*pwr_constr_elem != sdata->local->power_constr_level)) { - sdata->local->power_constr_level = *pwr_constr_elem; - ieee80211_hw_config(sdata->local, 0); + triplet = (void *)(country_ie + 3); + country_ie_len -= 3; + + switch (channel->band) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case IEEE80211_BAND_2GHZ: + case IEEE80211_BAND_60GHZ: + chan_increment = 1; + break; + case IEEE80211_BAND_5GHZ: + chan_increment = 4; + break; } + + /* find channel */ + while (country_ie_len >= 3) { + u8 first_channel = triplet->chans.first_channel; + + if (first_channel >= IEEE80211_COUNTRY_EXTENSION_ID) + goto next; + + for (i = 0; i < triplet->chans.num_channels; i++) { + if (first_channel + i * chan_increment == chan) { + have_chan_pwr = true; + chan_pwr = triplet->chans.max_power; + break; + } + } + if (have_chan_pwr) + break; + + next: + triplet++; + country_ie_len -= 3; + } + + if (!have_chan_pwr) + return; + + new_ap_level = max_t(int, 0, chan_pwr - *pwr_constr_elem); + + if (sdata->local->ap_power_level == new_ap_level) + return; + + sdata_info(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + new_ap_level, chan_pwr, *pwr_constr_elem, + sdata->u.mgd.bssid); + sdata->local->ap_power_level = new_ap_level; + ieee80211_hw_config(sdata->local, 0); } void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) @@ -1394,7 +1447,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); - local->power_constr_level = 0; + local->ap_power_level = 0; del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); @@ -2499,14 +2552,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, bssid, true); } - /* Note: country IE parsing is done for us by cfg80211 */ - if (elems.country_elem) { - /* TODO: IBSS also needs this */ - if (elems.pwr_constr_elem) - ieee80211_handle_pwr_constr(sdata, - le16_to_cpu(mgmt->u.probe_resp.capab_info), - elems.pwr_constr_elem); - } + if (elems.country_elem && elems.pwr_constr_elem && + mgmt->u.probe_resp.capab_info & + cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) + ieee80211_handle_pwr_constr(sdata, local->oper_channel, + elems.country_elem, + elems.country_elem_len, + elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); } -- cgit v1.2.1 From 5d8e4237d2dc73b51ac66dc612c5c42dd7424479 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Sep 2012 10:17:11 +0200 Subject: mac80211: change locking around ieee80211_recalc_smps Make the function acquire the necessary mutex itself to simplify the callers. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 -- net/mac80211/main.c | 2 -- net/mac80211/mlme.c | 2 +- net/mac80211/util.c | 7 ++++--- 4 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 00e31b488adc..9bd56a744982 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2070,9 +2070,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, */ if (!sdata->u.mgd.associated || sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) { - mutex_lock(&sdata->local->iflist_mtx); ieee80211_recalc_smps(sdata->local); - mutex_unlock(&sdata->local->iflist_mtx); return 0; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 416e85eae2d2..c80c4490351c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -364,9 +364,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, recalc_smps); - mutex_lock(&local->iflist_mtx); ieee80211_recalc_smps(local); - mutex_unlock(&local->iflist_mtx); } #ifdef CONFIG_INET diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45a9fa63b368..2dbd9e1e3583 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1348,9 +1348,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); - ieee80211_recalc_smps(local); mutex_unlock(&local->iflist_mtx); + ieee80211_recalc_smps(local); ieee80211_recalc_ps_vif(sdata); netif_tx_start_all_queues(sdata->dev); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2017904c69cc..22ca35054dd0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1606,14 +1606,13 @@ static int check_mgd_smps(struct ieee80211_if_managed *ifmgd, return 0; } -/* must hold iflist_mtx */ void ieee80211_recalc_smps(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; int count = 0; - lockdep_assert_held(&local->iflist_mtx); + mutex_lock(&local->iflist_mtx); /* * This function could be improved to handle multiple @@ -1642,12 +1641,14 @@ void ieee80211_recalc_smps(struct ieee80211_local *local) } if (smps_mode == local->smps_mode) - return; + goto unlock; set: local->smps_mode = smps_mode; /* changed flag is auto-detected for this */ ieee80211_hw_config(local, 0); + unlock: + mutex_unlock(&local->iflist_mtx); } static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) -- cgit v1.2.1 From 9385d04f2872057a2029901190391fe192b18693 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 14 Sep 2012 14:18:31 +0800 Subject: mac80211: allow re-open the blocked peer link in mesh Peer link which is blocked using the "iw mesh0 station set plink_action block" is previously not able to re-open using "iw mesh0 station set plink_action open". This patch is intended to solve this. If the station plink state remains at OPN_SNT once open, try block and open again should solve this problem. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 9d7ad366ef09..3ab34d816897 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -537,7 +537,8 @@ int mesh_plink_open(struct sta_info *sta) spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); sta->llid = llid; - if (sta->plink_state != NL80211_PLINK_LISTEN) { + if (sta->plink_state != NL80211_PLINK_LISTEN && + sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); return -EBUSY; } -- cgit v1.2.1 From 8fb974c937570be38f944986456467b39a2dc252 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:02 +0200 Subject: cgroup: net_cls: Do not define task_cls_classid() when not selected task_cls_classid() should not be defined in case the configuration is CONFIG_NET_CLS_CGROUP=n. The reason is that in a following patch the net_cls_subsys_id will only be defined if CONFIG_NET_CLS_CGROUP!=n. When net_cls is not built at all a callee should only get an empty task_cls_classid() without any references to net_cls_subsys_id. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced8d6a8..82cadc62a872 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1223,6 +1223,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) } #ifdef CONFIG_CGROUPS +#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) void sock_update_classid(struct sock *sk) { u32 classid; @@ -1234,6 +1235,7 @@ void sock_update_classid(struct sock *sk) sk->sk_classid = classid; } EXPORT_SYMBOL(sock_update_classid); +#endif void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { -- cgit v1.2.1 From 51e4e7faba786d33e5e33f8776c5027a1c8d6fb7 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:03 +0200 Subject: cgroup: net_prio: Do not define task_netpioidx() when not selected task_netprioidx() should not be defined in case the configuration is CONFIG_NETPRIO_CGROUP=n. The reason is that in a following patch the net_prio_subsys_id will only be defined if CONFIG_NETPRIO_CGROUP!=n. When net_prio is not built at all any callee should only get an empty task_netprioidx() without any references to net_prio_subsys_id. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 82cadc62a872..ca3eaee66056 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1237,6 +1237,7 @@ void sock_update_classid(struct sock *sk) EXPORT_SYMBOL(sock_update_classid); #endif +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { if (in_interrupt()) @@ -1246,6 +1247,7 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task) } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif +#endif /** * sk_alloc - All socket objects are allocated here -- cgit v1.2.1 From 8a8e04df4747661daaee77e98e102d99c9e09b98 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:07 +0200 Subject: cgroup: Assign subsystem IDs during compile time WARNING: With this change it is impossible to load external built controllers anymore. In case where CONFIG_NETPRIO_CGROUP=m and CONFIG_NET_CLS_CGROUP=m is set, corresponding subsys_id should also be a constant. Up to now, net_prio_subsys_id and net_cls_subsys_id would be of the type int and the value would be assigned during runtime. By switching the macro definition IS_SUBSYS_ENABLED from IS_BUILTIN to IS_ENABLED, all *_subsys_id will have constant value. That means we need to remove all the code which assumes a value can be assigned to net_prio_subsys_id and net_cls_subsys_id. A close look is necessary on the RCU part which was introduces by following patch: commit f845172531fb7410c7fb7780b1a6e51ee6df7d52 Author: Herbert Xu Mon May 24 09:12:34 2010 Committer: David S. Miller Mon May 24 09:12:34 2010 cls_cgroup: Store classid in struct sock Tis code was added to init_cgroup_cls() /* We can't use rcu_assign_pointer because this is an int. */ smp_wmb(); net_cls_subsys_id = net_cls_subsys.subsys_id; respectively to exit_cgroup_cls() net_cls_subsys_id = -1; synchronize_rcu(); and in module version of task_cls_classid() rcu_read_lock(); id = rcu_dereference(net_cls_subsys_id); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); Without an explicit explaination why the RCU part is needed. (The rcu_deference was fixed by exchanging it to rcu_derefence_index_check() in a later commit, but that is a minor detail.) So here is my pondering why it was introduced and why it safe to remove it now. Note that this code was copied over to net_prio the reasoning holds for that subsystem too. The idea behind the RCU use for net_cls_subsys_id is to make sure we get a valid pointer back from task_subsys_state(). task_subsys_state() is just blindly accessing the subsys array and returning the pointer. Obviously, passing in -1 as id into task_subsys_state() returns an invalid value (out of lower bound). So this code makes sure that only after module is loaded and the subsystem registered, the id is assigned. Before unregistering the module all old readers must have left the critical section. This is done by assigning -1 to the id and issuing a synchronized_rcu(). Any new readers wont call task_subsys_state() anymore and therefore it is safe to unregister the subsystem. The new code relies on the same trick, but it looks at the subsys pointer return by task_subsys_state() (remember the id is constant and therefore we allways have a valid index into the subsys array). No precautions need to be taken during module loading module. Eventually, all CPUs will get a valid pointer back from task_subsys_state() because rebind_subsystem() which is called after the module init() function will assigned subsys[net_cls_subsys_id] the newly loaded module subsystem pointer. When the subsystem is about to be removed, rebind_subsystem() will called before the module exit() function. In this case, rebind_subsys() will assign subsys[net_cls_subsys_id] a NULL pointer and then it calls synchronize_rcu(). All old readers have left by then the critical section. Any new reader wont access the subsystem anymore. At this point we are safe to unregister the subsystem. No synchronize_rcu() call is needed. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: "David S. Miller" Cc: "Paul E. McKenney" Cc: Andrew Morton Cc: Eric Dumazet Cc: Gao feng Cc: Glauber Costa Cc: Herbert Xu Cc: Jamal Hadi Salim Cc: John Fastabend Cc: Kamezawa Hiroyuki Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- net/core/netprio_cgroup.c | 11 ----------- net/core/sock.c | 11 ----------- net/sched/cls_cgroup.c | 13 ------------- 3 files changed, 35 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..6bc460c38e4f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -326,9 +326,7 @@ struct cgroup_subsys net_prio_subsys = { .create = cgrp_create, .destroy = cgrp_destroy, .attach = net_prio_attach, -#ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, -#endif .base_cftypes = ss_files, .module = THIS_MODULE }; @@ -366,10 +364,6 @@ static int __init init_cgroup_netprio(void) ret = cgroup_load_subsys(&net_prio_subsys); if (ret) goto out; -#ifndef CONFIG_NETPRIO_CGROUP - smp_wmb(); - net_prio_subsys_id = net_prio_subsys.subsys_id; -#endif register_netdevice_notifier(&netprio_device_notifier); @@ -386,11 +380,6 @@ static void __exit exit_cgroup_netprio(void) cgroup_unload_subsys(&net_prio_subsys); -#ifndef CONFIG_NETPRIO_CGROUP - net_prio_subsys_id = -1; - synchronize_rcu(); -#endif - rtnl_lock(); for_each_netdev(&init_net, dev) { old = rtnl_dereference(dev->priomap); diff --git a/net/core/sock.c b/net/core/sock.c index ca3eaee66056..47b4ac048e88 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -326,17 +326,6 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sk_backlog_rcv); -#if defined(CONFIG_CGROUPS) -#if !defined(CONFIG_NET_CLS_CGROUP) -int net_cls_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_cls_subsys_id); -#endif -#if !defined(CONFIG_NETPRIO_CGROUP) -int net_prio_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_prio_subsys_id); -#endif -#endif - static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { struct timeval tv; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..67cf90d962f4 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -77,9 +77,7 @@ struct cgroup_subsys net_cls_subsys = { .name = "net_cls", .create = cgrp_create, .destroy = cgrp_destroy, -#ifdef CONFIG_NET_CLS_CGROUP .subsys_id = net_cls_subsys_id, -#endif .base_cftypes = ss_files, .module = THIS_MODULE, }; @@ -283,12 +281,6 @@ static int __init init_cgroup_cls(void) if (ret) goto out; -#ifndef CONFIG_NET_CLS_CGROUP - /* We can't use rcu_assign_pointer because this is an int. */ - smp_wmb(); - net_cls_subsys_id = net_cls_subsys.subsys_id; -#endif - ret = register_tcf_proto_ops(&cls_cgroup_ops); if (ret) cgroup_unload_subsys(&net_cls_subsys); @@ -301,11 +293,6 @@ static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); -#ifndef CONFIG_NET_CLS_CGROUP - net_cls_subsys_id = -1; - synchronize_rcu(); -#endif - cgroup_unload_subsys(&net_cls_subsys); } -- cgit v1.2.1 From 8c7f6edbda01f1b1a2e60ad61f14fe38023e433b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Sep 2012 12:20:58 -0700 Subject: cgroup: mark subsystems with broken hierarchy support and whine if cgroups are nested for them Currently, cgroup hierarchy support is a mess. cpu related subsystems behave correctly - configuration, accounting and control on a parent properly cover its children. blkio and freezer completely ignore hierarchy and treat all cgroups as if they're directly under the root cgroup. Others show yet different behaviors. These differing interpretations of cgroup hierarchy make using cgroup confusing and it impossible to co-mount controllers into the same hierarchy and obtain sane behavior. Eventually, we want full hierarchy support from all subsystems and probably a unified hierarchy. Users using separate hierarchies expecting completely different behaviors depending on the mounted subsystem is deterimental to making any progress on this front. This patch adds cgroup_subsys.broken_hierarchy and sets it to %true for controllers which are lacking in hierarchy support. The goal of this patch is two-fold. * Move users away from using hierarchy on currently non-hierarchical subsystems, so that implementing proper hierarchy support on those doesn't surprise them. * Keep track of which controllers are broken how and nudge the subsystems to implement proper hierarchy support. For now, start with a single warning message. We can whine louder later on. v2: Fixed a typo spotted by Michal. Warning message updated. v3: Updated memcg part so that it doesn't generate warning in the cases where .use_hierarchy=false doesn't make the behavior different from root.use_hierarchy=true. Fixed a typo spotted by Glauber. v4: Check ->broken_hierarchy after cgroup creation is complete so that ->create() can affect the result per Michal. Dropped unnecessary memcg root handling per Michal. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Li Zefan Acked-by: Serge E. Hallyn Cc: Glauber Costa Cc: Peter Zijlstra Cc: Paul Turner Cc: Johannes Weiner Cc: Thomas Graf Cc: Vivek Goyal Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Neil Horman Cc: Aneesh Kumar K.V --- net/core/netprio_cgroup.c | 12 +++++++++++- net/sched/cls_cgroup.c | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..34f3615b30ca 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -330,7 +330,17 @@ struct cgroup_subsys net_prio_subsys = { .subsys_id = net_prio_subsys_id, #endif .base_cftypes = ss_files, - .module = THIS_MODULE + .module = THIS_MODULE, + + /* + * net_prio has artificial limit on the number of cgroups and + * disallows nesting making it impossible to co-mount it with other + * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ + * limit and properly nest configuration such that children follow + * their parents' configurations by default and are allowed to + * override and remove the following. + */ + .broken_hierarchy = true, }; static int netprio_device_event(struct notifier_block *unused, diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..907daf99ab2e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -82,6 +82,15 @@ struct cgroup_subsys net_cls_subsys = { #endif .base_cftypes = ss_files, .module = THIS_MODULE, + + /* + * While net_cls cgroup has the rudimentary hierarchy support of + * inheriting the parent's classid on cgroup creation, it doesn't + * properly propagates config changes in ancestors to their + * descendents. A child should follow the parent's configuration + * but be allowed to override it. Fix it and remove the following. + */ + .broken_hierarchy = true, }; struct cls_cgroup_head { -- cgit v1.2.1 From b004ff4972e2a42aa4512c90cc6a9e4dc1bb36b6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 20:12:19 -0700 Subject: netdev_printk/dynamic_netdev_dbg: Directly call printk_emit A lot of stack is used in recursive printks with %pV. Using multiple levels of %pV (a logging function with %pV that calls another logging function with %pV) can consume more stack than necessary. Avoid excessive stack use by not calling dev_printk from netdev_printk and dynamic_netdev_dbg. Duplicate the logic and form of dev_printk instead. Make __netdev_printk static. Remove EXPORT_SYMBOL(__netdev_printk) Whitespace and brace style neatening. Signed-off-by: Joe Perches Acked-by: David S. Miller Tested-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d7fe32c946c1..ac890f14613a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6423,22 +6423,30 @@ const char *netdev_drivername(const struct net_device *dev) return empty; } -int __netdev_printk(const char *level, const struct net_device *dev, +static int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf) { int r; - if (dev && dev->dev.parent) - r = dev_printk(level, dev->dev.parent, "%s: %pV", - netdev_name(dev), vaf); - else if (dev) + if (dev && dev->dev.parent) { + char dict[128]; + size_t dictlen = create_syslog_header(dev->dev.parent, + dict, sizeof(dict)); + + r = printk_emit(0, level[1] - '0', + dictlen ? dict : NULL, dictlen, + "%s %s: %s: %pV", + dev_driver_string(dev->dev.parent), + dev_name(dev->dev.parent), + netdev_name(dev), vaf); + } else if (dev) { r = printk("%s%s: %pV", level, netdev_name(dev), vaf); - else + } else { r = printk("%s(NULL net_device): %pV", level, vaf); + } return r; } -EXPORT_SYMBOL(__netdev_printk); int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...) @@ -6453,6 +6461,7 @@ int netdev_printk(const char *level, const struct net_device *dev, vaf.va = &args; r = __netdev_printk(level, dev, &vaf); + va_end(args); return r; @@ -6472,6 +6481,7 @@ int func(const struct net_device *dev, const char *fmt, ...) \ vaf.va = &args; \ \ r = __netdev_printk(level, dev, &vaf); \ + \ va_end(args); \ \ return r; \ -- cgit v1.2.1 From c2c5a7051c556036b7beb8f4a89eefdc91c3245b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 20:13:01 -0700 Subject: netdev_printk/netif_printk: Remove a superfluous logging colon netdev_printk originally called dev_printk with %pV. This style emitted the complete dev_printk header with a colon followed by the netdev_name prefix followed by a colon. Now that netdev_printk does not call dev_printk, the extra colon is superfluous. Remove it. Example: old: sky2 0000:02:00.0: eth0: Link is up at 100 Mbps, full duplex, flow control both new: sky2 0000:02:00.0 eth0: Link is up at 100 Mbps, full duplex, flow control both Signed-off-by: Joe Perches Acked-by: David S. Miller Tested-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ac890f14613a..cb9d43be07e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6435,7 +6435,7 @@ static int __netdev_printk(const char *level, const struct net_device *dev, r = printk_emit(0, level[1] - '0', dictlen ? dict : NULL, dictlen, - "%s %s: %s: %pV", + "%s %s %s: %pV", dev_driver_string(dev->dev.parent), dev_name(dev->dev.parent), netdev_name(dev), vaf); -- cgit v1.2.1 From 666f355f3805d68b6ed5f7013806f1f65abfbf03 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 20:14:11 -0700 Subject: device and dynamic_debug: Use dev_vprintk_emit and dev_printk_emit Convert direct calls of vprintk_emit and printk_emit to the dev_ equivalents. Make create_syslog_header static. Signed-off-by: Joe Perches Acked-by: David S. Miller Tested-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index cb9d43be07e7..76c0fe66fdc0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6429,16 +6429,12 @@ static int __netdev_printk(const char *level, const struct net_device *dev, int r; if (dev && dev->dev.parent) { - char dict[128]; - size_t dictlen = create_syslog_header(dev->dev.parent, - dict, sizeof(dict)); - - r = printk_emit(0, level[1] - '0', - dictlen ? dict : NULL, dictlen, - "%s %s %s: %pV", - dev_driver_string(dev->dev.parent), - dev_name(dev->dev.parent), - netdev_name(dev), vaf); + r = dev_printk_emit(level[1] - '0', + dev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(dev->dev.parent), + dev_name(dev->dev.parent), + netdev_name(dev), vaf); } else if (dev) { r = printk("%s%s: %pV", level, netdev_name(dev), vaf); } else { -- cgit v1.2.1 From e04dae840883ca31cea01e5958c09eee005cf4de Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 17 Sep 2012 00:52:41 +0000 Subject: af_unix: old_cred is surplus Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/unix/af_unix.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8a84ab64cafd..5b5c876c80e9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -441,7 +441,7 @@ static int unix_release_sock(struct sock *sk, int embrion) /* ---- Socket is dead now and most probably destroyed ---- */ /* - * Fixme: BSD difference: In BSD all sockets connected to use get + * Fixme: BSD difference: In BSD all sockets connected to us get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. @@ -481,7 +481,6 @@ static int unix_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); struct pid *old_pid = NULL; - const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -503,8 +502,6 @@ static int unix_listen(struct socket *sock, int backlog) out_unlock: unix_state_unlock(sk); put_pid(old_pid); - if (old_cred) - put_cred(old_cred); out: return err; } -- cgit v1.2.1 From cc6328dfe48fbc6ba67a4bb31473aeea429bee26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:10:52 +0000 Subject: llc2: Remove pointless indirection through llc_stat_state_trans_end Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index b2f2bac2c2a2..e16c1b96e527 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -331,14 +331,6 @@ static int llc_station_ac_report_status(struct sk_buff *skb) return 0; } -/* COMMON STATION STATE transitions */ - -/* dummy last-transition indicator; common to all state transition groups - * last entry for this state - * all members are zeros, .bss zeroes it - */ -static struct llc_station_state_trans llc_stat_state_trans_end; - /* DOWN STATE transitions */ /* state transition for LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK event */ @@ -372,7 +364,7 @@ static struct llc_station_state_trans llc_stat_down_state_trans_2 = { static struct llc_station_state_trans *llc_stat_dwn_state_trans[] = { [0] = &llc_stat_down_state_trans_1, [1] = &llc_stat_down_state_trans_2, - [2] = &llc_stat_state_trans_end, + [2] = NULL, }; /* UP STATE transitions */ @@ -417,7 +409,7 @@ static struct llc_station_state_trans *llc_stat_up_state_trans [] = { [0] = &llc_stat_up_state_trans_1, [1] = &llc_stat_up_state_trans_2, [2] = &llc_stat_up_state_trans_3, - [3] = &llc_stat_state_trans_end, + [3] = NULL, }; /* DUP ADDR CHK STATE transitions */ @@ -512,7 +504,7 @@ static struct llc_station_state_trans *llc_stat_dupaddr_state_trans[] = { [3] = &llc_stat_dupaddr_state_trans_1, /* Receive frame */ [4] = &llc_stat_dupaddr_state_trans_2, [5] = &llc_stat_dupaddr_state_trans_3, - [6] = &llc_stat_state_trans_end, + [6] = NULL, }; static struct llc_station_state @@ -568,7 +560,7 @@ static struct llc_station_state_trans * struct llc_station_state *curr_state = &llc_station_state_table[llc_main_station.state - 1]; - for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) + for (next_trans = curr_state->transitions; next_trans[i]; i++) if (!next_trans[i]->ev(skb)) { rc = next_trans[i]; break; -- cgit v1.2.1 From 025e36332573177ecd7c12730e18a4390f994f05 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:11:18 +0000 Subject: llc2: Remove dead code for state machine The initial state is UP and there is no way to enter the other states as the required event type is never generated. Delete all states, event types, and other dead code. The only thing left is handling of the XID and TEST commands. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 404 ++------------------------------------------------ 1 file changed, 9 insertions(+), 395 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index e16c1b96e527..917d700791ba 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -30,19 +30,12 @@ * * SAP and connection resource manager, one per adapter. * - * @state: state of station - * @xid_r_count: XID response PDU counter * @mac_sa: MAC source address * @sap_list: list of related SAPs * @ev_q: events entering state mach. * @mac_pdu_q: PDUs ready to send to MAC */ struct llc_station { - u8 state; - u8 xid_r_count; - struct timer_list ack_timer; - u8 retry_count; - u8 maximum_retry; struct { struct sk_buff_head list; spinlock_t lock; @@ -50,162 +43,38 @@ struct llc_station { struct sk_buff_head mac_pdu_q; }; -#define LLC_STATION_ACK_TIME (3 * HZ) - -int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME; - -/* Types of events (possible values in 'ev->type') */ -#define LLC_STATION_EV_TYPE_SIMPLE 1 -#define LLC_STATION_EV_TYPE_CONDITION 2 -#define LLC_STATION_EV_TYPE_PRIM 3 -#define LLC_STATION_EV_TYPE_PDU 4 /* command/response PDU */ -#define LLC_STATION_EV_TYPE_ACK_TMR 5 -#define LLC_STATION_EV_TYPE_RPT_STATUS 6 - -/* Events */ -#define LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK 1 -#define LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK 2 -#define LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY 3 -#define LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY 4 -#define LLC_STATION_EV_RX_NULL_DSAP_XID_C 5 -#define LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ 6 -#define LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ 7 -#define LLC_STATION_EV_RX_NULL_DSAP_TEST_C 8 -#define LLC_STATION_EV_DISABLE_REQ 9 - -struct llc_station_state_ev { - u8 type; - u8 prim; - u8 prim_type; - u8 reason; - struct list_head node; /* node in station->ev_q.list */ -}; - -static __inline__ struct llc_station_state_ev * - llc_station_ev(struct sk_buff *skb) -{ - return (struct llc_station_state_ev *)skb->cb; -} - typedef int (*llc_station_ev_t)(struct sk_buff *skb); -#define LLC_STATION_STATE_DOWN 1 /* initial state */ -#define LLC_STATION_STATE_DUP_ADDR_CHK 2 -#define LLC_STATION_STATE_UP 3 - -#define LLC_NBR_STATION_STATES 3 /* size of state table */ - typedef int (*llc_station_action_t)(struct sk_buff *skb); /* Station component state table structure */ struct llc_station_state_trans { llc_station_ev_t ev; - u8 next_state; llc_station_action_t *ev_actions; }; -struct llc_station_state { - u8 curr_state; - struct llc_station_state_trans **transitions; -}; - static struct llc_station llc_main_station; -static int llc_stat_ev_enable_with_dup_addr_check(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_SIMPLE && - ev->prim_type == - LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK ? 0 : 1; -} - -static int llc_stat_ev_enable_without_dup_addr_check(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_SIMPLE && - ev->prim_type == - LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK ? 0 : 1; -} - -static int llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_ACK_TMR && - llc_main_station.retry_count < - llc_main_station.maximum_retry ? 0 : 1; -} - -static int llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_ACK_TMR && - llc_main_station.retry_count == - llc_main_station.maximum_retry ? 0 : 1; -} - static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_CMD(pdu) && /* command PDU */ + return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && !pdu->dsap ? 0 : 1; /* NULL DSAP value */ } -static int llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_RSP(pdu) && /* response PDU */ - LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ - LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap && /* NULL DSAP value */ - !llc_main_station.xid_r_count ? 0 : 1; -} - -static int llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_RSP(pdu) && /* response PDU */ - LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ - LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap && /* NULL DSAP value */ - llc_main_station.xid_r_count == 1 ? 0 : 1; -} - static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_CMD(pdu) && /* command PDU */ + return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && !pdu->dsap ? 0 : 1; /* NULL DSAP */ } -static int llc_stat_ev_disable_req(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_PRIM && - ev->prim == LLC_DISABLE_PRIM && - ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; -} - /** * llc_station_send_pdu - queues PDU to send * @skb: Address of the PDU @@ -220,58 +89,6 @@ static void llc_station_send_pdu(struct sk_buff *skb) break; } -static int llc_station_ac_start_ack_timer(struct sk_buff *skb) -{ - mod_timer(&llc_main_station.ack_timer, - jiffies + sysctl_llc_station_ack_timeout); - return 0; -} - -static int llc_station_ac_set_retry_cnt_0(struct sk_buff *skb) -{ - llc_main_station.retry_count = 0; - return 0; -} - -static int llc_station_ac_inc_retry_cnt_by_1(struct sk_buff *skb) -{ - llc_main_station.retry_count++; - return 0; -} - -static int llc_station_ac_set_xid_r_cnt_0(struct sk_buff *skb) -{ - llc_main_station.xid_r_count = 0; - return 0; -} - -static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) -{ - llc_main_station.xid_r_count++; - return 0; -} - -static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) -{ - int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, - sizeof(struct llc_xid_info)); - - if (!nskb) - goto out; - llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD); - llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127); - rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, skb->dev->dev_addr); - if (unlikely(rc)) - goto free; - llc_station_send_pdu(nskb); -out: - return rc; -free: - kfree_skb(nskb); - goto out; -} - static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; @@ -326,60 +143,6 @@ free: goto out; } -static int llc_station_ac_report_status(struct sk_buff *skb) -{ - return 0; -} - -/* DOWN STATE transitions */ - -/* state transition for LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK event */ -static llc_station_action_t llc_stat_down_state_actions_1[] = { - [0] = llc_station_ac_start_ack_timer, - [1] = llc_station_ac_set_retry_cnt_0, - [2] = llc_station_ac_set_xid_r_cnt_0, - [3] = llc_station_ac_send_null_dsap_xid_c, - [4] = NULL, -}; - -static struct llc_station_state_trans llc_stat_down_state_trans_1 = { - .ev = llc_stat_ev_enable_with_dup_addr_check, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_down_state_actions_1, -}; - -/* state transition for LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK event */ -static llc_station_action_t llc_stat_down_state_actions_2[] = { - [0] = llc_station_ac_report_status, /* STATION UP */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_down_state_trans_2 = { - .ev = llc_stat_ev_enable_without_dup_addr_check, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_down_state_actions_2, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_dwn_state_trans[] = { - [0] = &llc_stat_down_state_trans_1, - [1] = &llc_stat_down_state_trans_2, - [2] = NULL, -}; - -/* UP STATE transitions */ -/* state transition for LLC_STATION_EV_DISABLE_REQ event */ -static llc_station_action_t llc_stat_up_state_actions_1[] = { - [0] = llc_station_ac_report_status, /* STATION DOWN */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_1 = { - .ev = llc_stat_ev_disable_req, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_up_state_actions_1, -}; - /* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ static llc_station_action_t llc_stat_up_state_actions_2[] = { [0] = llc_station_ac_send_xid_r, @@ -388,7 +151,6 @@ static llc_station_action_t llc_stat_up_state_actions_2[] = { static struct llc_station_state_trans llc_stat_up_state_trans_2 = { .ev = llc_stat_ev_rx_null_dsap_xid_c, - .next_state = LLC_STATION_STATE_UP, .ev_actions = llc_stat_up_state_actions_2, }; @@ -400,127 +162,14 @@ static llc_station_action_t llc_stat_up_state_actions_3[] = { static struct llc_station_state_trans llc_stat_up_state_trans_3 = { .ev = llc_stat_ev_rx_null_dsap_test_c, - .next_state = LLC_STATION_STATE_UP, .ev_actions = llc_stat_up_state_actions_3, }; /* array of pointers; one to each transition */ static struct llc_station_state_trans *llc_stat_up_state_trans [] = { - [0] = &llc_stat_up_state_trans_1, - [1] = &llc_stat_up_state_trans_2, - [2] = &llc_stat_up_state_trans_3, - [3] = NULL, -}; - -/* DUP ADDR CHK STATE transitions */ -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_1[] = { - [0] = llc_station_ac_inc_xid_r_cnt_by_1, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_1 = { - .ev = llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_1, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_2[] = { - [0] = llc_station_ac_report_status, /* DUPLICATE ADDRESS FOUND */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_2 = { - .ev = llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_dupaddr_state_actions_2, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ -static llc_station_action_t llc_stat_dupaddr_state_actions_3[] = { - [0] = llc_station_ac_send_xid_r, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_3 = { - .ev = llc_stat_ev_rx_null_dsap_xid_c, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_3, -}; - -/* state transition for LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_4[] = { - [0] = llc_station_ac_start_ack_timer, - [1] = llc_station_ac_inc_retry_cnt_by_1, - [2] = llc_station_ac_set_xid_r_cnt_0, - [3] = llc_station_ac_send_null_dsap_xid_c, - [4] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_4 = { - .ev = llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_4, -}; - -/* state transition for LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_5[] = { - [0] = llc_station_ac_report_status, /* STATION UP */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_5 = { - .ev = llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_dupaddr_state_actions_5, -}; - -/* state transition for LLC_STATION_EV_DISABLE_REQ event */ -static llc_station_action_t llc_stat_dupaddr_state_actions_6[] = { - [0] = llc_station_ac_report_status, /* STATION DOWN */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_6 = { - .ev = llc_stat_ev_disable_req, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_dupaddr_state_actions_6, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_dupaddr_state_trans[] = { - [0] = &llc_stat_dupaddr_state_trans_6, /* Request */ - [1] = &llc_stat_dupaddr_state_trans_4, /* Timer */ - [2] = &llc_stat_dupaddr_state_trans_5, - [3] = &llc_stat_dupaddr_state_trans_1, /* Receive frame */ - [4] = &llc_stat_dupaddr_state_trans_2, - [5] = &llc_stat_dupaddr_state_trans_3, - [6] = NULL, -}; - -static struct llc_station_state - llc_station_state_table[LLC_NBR_STATION_STATES] = { - [LLC_STATION_STATE_DOWN - 1] = { - .curr_state = LLC_STATION_STATE_DOWN, - .transitions = llc_stat_dwn_state_trans, - }, - [LLC_STATION_STATE_DUP_ADDR_CHK - 1] = { - .curr_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .transitions = llc_stat_dupaddr_state_trans, - }, - [LLC_STATION_STATE_UP - 1] = { - .curr_state = LLC_STATION_STATE_UP, - .transitions = llc_stat_up_state_trans, - }, + &llc_stat_up_state_trans_2, + &llc_stat_up_state_trans_3, + NULL, }; /** @@ -557,10 +206,8 @@ static struct llc_station_state_trans * int i = 0; struct llc_station_state_trans *rc = NULL; struct llc_station_state_trans **next_trans; - struct llc_station_state *curr_state = - &llc_station_state_table[llc_main_station.state - 1]; - for (next_trans = curr_state->transitions; next_trans[i]; i++) + for (next_trans = llc_stat_up_state_trans; next_trans[i]; i++) if (!next_trans[i]->ev(skb)) { rc = next_trans[i]; break; @@ -576,10 +223,7 @@ static struct llc_station_state_trans * */ static void llc_station_free_ev(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); - - if (ev->type == LLC_STATION_EV_TYPE_PDU) - kfree_skb(skb); + kfree_skb(skb); } /** @@ -594,26 +238,18 @@ static u16 llc_station_next_state(struct sk_buff *skb) u16 rc = 1; struct llc_station_state_trans *trans; - if (llc_main_station.state > LLC_NBR_STATION_STATES) - goto out; trans = llc_find_station_trans(skb); - if (trans) { + if (trans) /* got the state to which we next transition; perform the * actions associated with this transition before actually * transitioning to the next state */ rc = llc_exec_station_trans_actions(trans, skb); - if (!rc) - /* transition station to next state if all actions - * execute successfully; done; wait for next event - */ - llc_main_station.state = trans->next_state; - } else + else /* event not recognized in current state; re-queue it for * processing again at a later time; return failure */ rc = 0; -out: llc_station_free_ev(skb); return rc; } @@ -652,18 +288,6 @@ static void llc_station_state_process(struct sk_buff *skb) spin_unlock_bh(&llc_main_station.ev_q.lock); } -static void llc_station_ack_tmr_cb(unsigned long timeout_data) -{ - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - if (skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); - - ev->type = LLC_STATION_EV_TYPE_ACK_TMR; - llc_station_state_process(skb); - } -} - /** * llc_station_rcv - send received pdu to the station state machine * @skb: received frame. @@ -672,10 +296,6 @@ static void llc_station_ack_tmr_cb(unsigned long timeout_data) */ static void llc_station_rcv(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); - - ev->type = LLC_STATION_EV_TYPE_PDU; - ev->reason = 0; llc_station_state_process(skb); } @@ -684,12 +304,6 @@ void __init llc_station_init(void) skb_queue_head_init(&llc_main_station.mac_pdu_q); skb_queue_head_init(&llc_main_station.ev_q.list); spin_lock_init(&llc_main_station.ev_q.lock); - setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb, - (unsigned long)&llc_main_station); - llc_main_station.ack_timer.expires = jiffies + - sysctl_llc_station_ack_timeout; - llc_main_station.maximum_retry = 1; - llc_main_station.state = LLC_STATION_STATE_UP; llc_set_station_handler(llc_station_rcv); } -- cgit v1.2.1 From 04d191c259e2a2832ea7aef14cb02fe03a71d51f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:11:25 +0000 Subject: llc2: Collapse the station event receive path We only ever put one skb on the event queue, and then immediately process it. Remove the queue and fold together the related functions, removing several blatantly false comments. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 87 ++++----------------------------------------------- 1 file changed, 6 insertions(+), 81 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 917d700791ba..3bdb888f8501 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -32,14 +32,9 @@ * * @mac_sa: MAC source address * @sap_list: list of related SAPs - * @ev_q: events entering state mach. * @mac_pdu_q: PDUs ready to send to MAC */ struct llc_station { - struct { - struct sk_buff_head list; - spinlock_t lock; - } ev_q; struct sk_buff_head mac_pdu_q; }; @@ -215,79 +210,6 @@ static struct llc_station_state_trans * return rc; } -/** - * llc_station_free_ev - frees an event - * @skb: Address of the event - * - * Frees an event. - */ -static void llc_station_free_ev(struct sk_buff *skb) -{ - kfree_skb(skb); -} - -/** - * llc_station_next_state - processes event and goes to the next state - * @skb: Address of the event - * - * Processes an event, executes any transitions related to that event and - * updates the state of the station. - */ -static u16 llc_station_next_state(struct sk_buff *skb) -{ - u16 rc = 1; - struct llc_station_state_trans *trans; - - trans = llc_find_station_trans(skb); - if (trans) - /* got the state to which we next transition; perform the - * actions associated with this transition before actually - * transitioning to the next state - */ - rc = llc_exec_station_trans_actions(trans, skb); - else - /* event not recognized in current state; re-queue it for - * processing again at a later time; return failure - */ - rc = 0; - llc_station_free_ev(skb); - return rc; -} - -/** - * llc_station_service_events - service events in the queue - * - * Get an event from the station event queue (if any); attempt to service - * the event; if event serviced, get the next event (if any) on the event - * queue; if event not service, re-queue the event on the event queue and - * attempt to service the next event; when serviced all events in queue, - * finished; if don't transition to different state, just service all - * events once; if transition to new state, service all events again. - * Caller must hold llc_main_station.ev_q.lock. - */ -static void llc_station_service_events(void) -{ - struct sk_buff *skb; - - while ((skb = skb_dequeue(&llc_main_station.ev_q.list)) != NULL) - llc_station_next_state(skb); -} - -/** - * llc_station_state_process - queue event and try to process queue. - * @skb: Address of the event - * - * Queues an event (on the station event queue) for handling by the - * station state machine and attempts to process any queued-up events. - */ -static void llc_station_state_process(struct sk_buff *skb) -{ - spin_lock_bh(&llc_main_station.ev_q.lock); - skb_queue_tail(&llc_main_station.ev_q.list, skb); - llc_station_service_events(); - spin_unlock_bh(&llc_main_station.ev_q.lock); -} - /** * llc_station_rcv - send received pdu to the station state machine * @skb: received frame. @@ -296,14 +218,17 @@ static void llc_station_state_process(struct sk_buff *skb) */ static void llc_station_rcv(struct sk_buff *skb) { - llc_station_state_process(skb); + struct llc_station_state_trans *trans; + + trans = llc_find_station_trans(skb); + if (trans) + llc_exec_station_trans_actions(trans, skb); + kfree_skb(skb); } void __init llc_station_init(void) { skb_queue_head_init(&llc_main_station.mac_pdu_q); - skb_queue_head_init(&llc_main_station.ev_q.list); - spin_lock_init(&llc_main_station.ev_q.lock); llc_set_station_handler(llc_station_rcv); } -- cgit v1.2.1 From 5ecf9eea2660c4fe894fabd3c3d0b64860fb0160 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:11:32 +0000 Subject: llc2: Remove the station send queue We only ever put one skb on the send queue, and then immediately send it. Remove the queue and call dev_queue_xmit() directly. This leaves struct llc_station empty, so remove that as well. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 3bdb888f8501..48c21184bf2c 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -25,19 +25,6 @@ #include #include -/** - * struct llc_station - LLC station component - * - * SAP and connection resource manager, one per adapter. - * - * @mac_sa: MAC source address - * @sap_list: list of related SAPs - * @mac_pdu_q: PDUs ready to send to MAC - */ -struct llc_station { - struct sk_buff_head mac_pdu_q; -}; - typedef int (*llc_station_ev_t)(struct sk_buff *skb); typedef int (*llc_station_action_t)(struct sk_buff *skb); @@ -48,8 +35,6 @@ struct llc_station_state_trans { llc_station_action_t *ev_actions; }; -static struct llc_station llc_main_station; - static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); @@ -70,20 +55,6 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) !pdu->dsap ? 0 : 1; /* NULL DSAP */ } -/** - * llc_station_send_pdu - queues PDU to send - * @skb: Address of the PDU - * - * Queues a PDU to send to the MAC layer. - */ -static void llc_station_send_pdu(struct sk_buff *skb) -{ - skb_queue_tail(&llc_main_station.mac_pdu_q, skb); - while ((skb = skb_dequeue(&llc_main_station.mac_pdu_q)) != NULL) - if (dev_queue_xmit(skb)) - break; -} - static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; @@ -101,7 +72,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; - llc_station_send_pdu(nskb); + dev_queue_xmit(nskb); out: return rc; free: @@ -130,7 +101,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; - llc_station_send_pdu(nskb); + dev_queue_xmit(nskb); out: return rc; free: @@ -228,7 +199,6 @@ static void llc_station_rcv(struct sk_buff *skb) void __init llc_station_init(void) { - skb_queue_head_init(&llc_main_station.mac_pdu_q); llc_set_station_handler(llc_station_rcv); } -- cgit v1.2.1 From da3188801898f2fb8859c232554b100f2a0250f8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:11:40 +0000 Subject: llc2: Remove explicit indexing of state action arrays These arrays are accessed by iteration in llc_exec_station_trans_actions(). There must not be any zero-filled gaps in them, so the explicit indices are pointless. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 48c21184bf2c..fe43158e06de 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -111,8 +111,8 @@ free: /* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ static llc_station_action_t llc_stat_up_state_actions_2[] = { - [0] = llc_station_ac_send_xid_r, - [1] = NULL, + llc_station_ac_send_xid_r, + NULL, }; static struct llc_station_state_trans llc_stat_up_state_trans_2 = { @@ -122,8 +122,8 @@ static struct llc_station_state_trans llc_stat_up_state_trans_2 = { /* state transition for LLC_STATION_EV_RX_NULL_DSAP_TEST_C event */ static llc_station_action_t llc_stat_up_state_actions_3[] = { - [0] = llc_station_ac_send_test_r, - [1] = NULL, + llc_station_ac_send_test_r, + NULL, }; static struct llc_station_state_trans llc_stat_up_state_trans_3 = { -- cgit v1.2.1 From 12ebc8b9af7e29ff4dc77ee0e73a6b1de513d659 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 15 Sep 2012 17:11:47 +0000 Subject: llc2: Collapse remainder of state machine into simple if-else if-statement Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 91 +++------------------------------------------------ 1 file changed, 4 insertions(+), 87 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index fe43158e06de..204a8351efff 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -25,16 +25,6 @@ #include #include -typedef int (*llc_station_ev_t)(struct sk_buff *skb); - -typedef int (*llc_station_action_t)(struct sk_buff *skb); - -/* Station component state table structure */ -struct llc_station_state_trans { - llc_station_ev_t ev; - llc_station_action_t *ev_actions; -}; - static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); @@ -109,78 +99,6 @@ free: goto out; } -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ -static llc_station_action_t llc_stat_up_state_actions_2[] = { - llc_station_ac_send_xid_r, - NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_2 = { - .ev = llc_stat_ev_rx_null_dsap_xid_c, - .ev_actions = llc_stat_up_state_actions_2, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_TEST_C event */ -static llc_station_action_t llc_stat_up_state_actions_3[] = { - llc_station_ac_send_test_r, - NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_3 = { - .ev = llc_stat_ev_rx_null_dsap_test_c, - .ev_actions = llc_stat_up_state_actions_3, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_up_state_trans [] = { - &llc_stat_up_state_trans_2, - &llc_stat_up_state_trans_3, - NULL, -}; - -/** - * llc_exec_station_trans_actions - executes actions for transition - * @trans: Address of the transition - * @skb: Address of the event that caused the transition - * - * Executes actions of a transition of the station state machine. Returns - * 0 if all actions complete successfully, nonzero otherwise. - */ -static u16 llc_exec_station_trans_actions(struct llc_station_state_trans *trans, - struct sk_buff *skb) -{ - u16 rc = 0; - llc_station_action_t *next_action = trans->ev_actions; - - for (; next_action && *next_action; next_action++) - if ((*next_action)(skb)) - rc = 1; - return rc; -} - -/** - * llc_find_station_trans - finds transition for this event - * @skb: Address of the event - * - * Search thru events of the current state of the station until list - * exhausted or it's obvious that the event is not valid for the current - * state. Returns the address of the transition if cound, %NULL otherwise. - */ -static struct llc_station_state_trans * - llc_find_station_trans(struct sk_buff *skb) -{ - int i = 0; - struct llc_station_state_trans *rc = NULL; - struct llc_station_state_trans **next_trans; - - for (next_trans = llc_stat_up_state_trans; next_trans[i]; i++) - if (!next_trans[i]->ev(skb)) { - rc = next_trans[i]; - break; - } - return rc; -} - /** * llc_station_rcv - send received pdu to the station state machine * @skb: received frame. @@ -189,11 +107,10 @@ static struct llc_station_state_trans * */ static void llc_station_rcv(struct sk_buff *skb) { - struct llc_station_state_trans *trans; - - trans = llc_find_station_trans(skb); - if (trans) - llc_exec_station_trans_actions(trans, skb); + if (llc_stat_ev_rx_null_dsap_xid_c(skb)) + llc_station_ac_send_xid_r(skb); + else if (llc_stat_ev_rx_null_dsap_test_c(skb)) + llc_station_ac_send_test_r(skb); kfree_skb(skb); } -- cgit v1.2.1 From b4516a288e71c64d7e214902250baf78b7b3cdcf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 17 Sep 2012 13:13:24 -0400 Subject: llc: Remove stray reference to sysctl_llc_station_ack_timeout. Signed-off-by: David S. Miller --- net/llc/sysctl_net_llc.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index d75306b9c2f3..612a5ddaf93b 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -47,13 +47,6 @@ static struct ctl_table llc2_timeout_table[] = { }; static struct ctl_table llc_station_table[] = { - { - .procname = "ack_timeout", - .data = &sysctl_llc_station_ack_timeout, - .maxlen = sizeof(long), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { }, }; -- cgit v1.2.1 From e1760bd5ffae8cb98cffb030ee8e631eba28f3d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 22:39:43 -0700 Subject: userns: Convert the audit loginuid to be a kuid Always store audit loginuids in type kuid_t. Print loginuids by converting them into uids in the appropriate user namespace, and then printing the resulting uid. Modify audit_get_loginuid to return a kuid_t. Modify audit_set_loginuid to take a kuid_t. Modify /proc//loginuid on read to convert the loginuid into the user namespace of the opener of the file. Modify /proc//loginud on write to convert the loginuid rom the user namespace of the opener of the file. Cc: Al Viro Cc: Eric Paris Cc: Paul Moore ? Cc: David Miller Signed-off-by: Eric W. Biederman --- net/core/dev.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlabel/netlabel_user.c | 2 +- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 12 ++++++------ 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 026bb4a37665..1c0d0823a5a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4524,7 +4524,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), - audit_get_loginuid(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, uid), from_kgid(&init_user_ns, gid), audit_get_sessionid(current)); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..729a345c75a4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_task_getsecid(current, &audit_info.secid); - audit_info.loginuid = 0; + audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9fae63f10298..9650c4ad5f88 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -109,7 +109,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", - audit_info->loginuid, + from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); if (audit_info->secid != 0 && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..2f475151cea1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2630,12 +2630,12 @@ static void xfrm_policy_fini(struct net *net) flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); @@ -2742,7 +2742,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2757,7 +2757,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..fce6a49bc7c6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2045,7 +2045,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); @@ -2112,7 +2112,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2127,7 +2127,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..9ea55db737b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -575,7 +575,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -654,7 +654,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1369,7 +1369,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1624,7 +1624,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1918,7 +1918,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1961,7 +1961,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; -- cgit v1.2.1 From f3baed51f44dd6b6c7076d2de24d8aed291d8130 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 17 Sep 2012 11:54:15 +0100 Subject: wireless: remove unreachable code The only case where intersected_rd can become non NULL is within an if. All paths from that if return, so the end chunk has therefore squawked its last and is no more. Signed-off-by: Alan Cox Signed-off-by: Johannes Berg --- net/wireless/reg.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1ad04e54014c..0ba3328dcc9a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2193,7 +2193,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) static int __set_regdom(const struct ieee80211_regdomain *rd) { const struct ieee80211_regdomain *intersected_rd = NULL; - struct cfg80211_registered_device *rdev = NULL; struct wiphy *request_wiphy; /* Some basic sanity checks first */ @@ -2305,24 +2304,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - if (!intersected_rd) - return -EINVAL; - - rdev = wiphy_to_dev(request_wiphy); - - rdev->country_ie_alpha2[0] = rd->alpha2[0]; - rdev->country_ie_alpha2[1] = rd->alpha2[1]; - rdev->env = last_request->country_ie_env; - - BUG_ON(intersected_rd == rd); - - kfree(rd); - rd = NULL; - - reset_regdomains(false); - cfg80211_regdomain = intersected_rd; - - return 0; + return -EINVAL; } -- cgit v1.2.1 From ed44a951c72ab409f932b1c15914488308e86da2 Mon Sep 17 00:00:00 2001 From: Pandiyarajan Pitchaimuthu Date: Tue, 18 Sep 2012 16:50:49 +0530 Subject: cfg80211/nl80211: Notify connection request failure in AP mode In AP mode, when a station requests connection to an AP and if the request is failed for particular reason, userspace is notified about the failure through NL80211_CMD_CONN_FAILED command. Reason for the failure is sent through the attribute NL80211_ATTR_CONN_FAILED_REASON. Signed-off-by: Pandiyarajan Pitchaimuthu Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 11 +++++++++++ net/wireless/nl80211.c | 34 ++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 +++++ 3 files changed, 50 insertions(+) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8fd0242ee169..3df195a3e336 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -612,6 +612,17 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) } EXPORT_SYMBOL(cfg80211_del_sta); +void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp); +} +EXPORT_SYMBOL(cfg80211_conn_failed); + struct cfg80211_mgmt_registration { struct list_head list; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 222189b6ed53..f1047aea868a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8364,6 +8364,40 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || + nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 9f2616fffb40..f6153516068c 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -91,6 +91,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, gfp_t gfp); +void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp); + int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, -- cgit v1.2.1 From bafa6d9d89072c1a18853afe9ee5de05c491c13a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 7 Sep 2012 00:45:29 +0000 Subject: ipv4/route: arg delay is useless in rt_cache_flush() Since route cache deletion (89aef8921bfbac22f), delay is no more used. Remove it. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_frontend.c | 20 ++++++++++---------- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_trie.c | 6 +++--- net/ipv4/route.c | 19 +++---------------- 6 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 77e87aff419a..47800459e4cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef7..9b55b6f5a585 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3fba792..8e2b475da9fa 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; case NETDEV_UNREGISTER_BATCH: break; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e498d2..274309d3aded 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 57bd978483e1..d1b93595b4a7 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 82cf2a722b23..f6436d3b207a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net) atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) +void rt_cache_flush(struct net *net) { rt_cache_invalidate(net); } @@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } -- cgit v1.2.1 From 2885da72966fcb89f48d554339d347fb02b5ea78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Sep 2012 22:27:11 +0200 Subject: net: rt_cache_flush() cleanup We dont use jhash anymore since route cache removal, so we can get rid of get_random_bytes() calls for rt_genid changes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6436d3b207a..be27cfa96e88 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -447,23 +447,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) -{ - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - void rt_cache_flush(struct net *net) { - rt_cache_invalidate(net); + atomic_inc(&net->ipv4.rt_genid); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2520,8 +2506,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->ipv4.rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.2.1 From b42664f898c976247f7f609b8bb9c94d7475ca10 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:44 +0000 Subject: netns: move net->ipv4.rt_genid to net->rt_genid This commit prepares the use of rt_genid by both IPv4 and IPv6. Initialization is left in IPv4 part. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/route.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index be27cfa96e88..fd9af60397b5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -449,7 +444,7 @@ static inline bool rt_is_expired(const struct rtable *rth) void rt_cache_flush(struct net *net) { - atomic_inc(&net->ipv4.rt_genid); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2506,7 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->ipv4.rt_genid, 0); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.2.1 From ee8372dd1989287c5eedb69d44bac43f69e496f1 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:45 +0000 Subject: xfrm: invalidate dst on policy insertion/deletion When a policy is inserted or deleted, all dst should be recalculated. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5a2aa17e4d3c..ab2ce7d5152d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -585,6 +585,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); + rt_genid_bump(net); if (delpol) __xfrm_policy_unlink(delpol, dir); policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); -- cgit v1.2.1 From 6f3118b571b8a4c06c7985dc3172c3526cb86253 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:46 +0000 Subject: ipv6: use net->rt_genid to check dst validity IPv6 dst should take care of rt_genid too. When a xfrm policy is inserted or deleted, all dst should be invalidated. To force the validation, dst entries should be created with ->obsolete set to DST_OBSOLETE_FORCE_CHK. This was already the case for all functions calling ip6_dst_alloc(), except for ip6_rt_copy(). As a consequence, we can remove the specific code in inet6_connection_sock. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 23 +---------------------- net/ipv6/route.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0251a6005be8..c4f934176cab 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); - -#ifdef CONFIG_XFRM - { - struct rt6_info *rt = (struct rt6_info *)dst; - rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); - } -#endif } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst; - - dst = __sk_dst_check(sk, cookie); - -#ifdef CONFIG_XFRM - if (dst) { - struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - __sk_dst_reset(sk); - dst = NULL; - } - } -#endif - - return dst; + return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd279100..fb29e2215a19 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_NONE, flags); + 0, DST_OBSOLETE_FORCE_CHK, flags); if (rt) { struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + rt->rt6i_genid = rt_genid(net); } return rt; } @@ -1031,6 +1032,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; + /* All IPV6 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ + if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) + return NULL; + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { if (!rt6_has_peer(rt)) @@ -1397,8 +1405,6 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->dst.obsolete = -1; - if (cfg->fc_flags & RTF_EXPIRES) rt6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -2080,7 +2086,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; - rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) -- cgit v1.2.1 From 2c20cbd7e3aa6e9dddc07975d3f3a89fe1f69c00 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:47 +0000 Subject: ipv6: use DST_* macro to set obselete field Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb29e2215a19..854e4018d205 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, @@ -246,7 +246,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, @@ -261,7 +261,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, .output = dst_discard, -- cgit v1.2.1 From 4c835019a6632b9800e23d9d281b2733ed0ecbb2 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 18 Sep 2012 07:10:43 +0000 Subject: net/ieee802154/6lowpan.c: Remove unecessary semicolon Found by http://coccinelle.lip6.fr/ Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index d5291113584f..6d42c17af96b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1256,7 +1256,7 @@ static int lowpan_device_event(struct notifier_block *unused, } unregister_netdevice_many(&del_list); - }; + } out: return NOTIFY_DONE; -- cgit v1.2.1 From a2bf91b5b8de0be867d4ff3b2533c6449149098c Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 18 Sep 2012 07:10:44 +0000 Subject: net/openvswitch/vport.c: Remove unecessary semicolon Found by http://coccinelle.lip6.fr/ Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9055dd698c70..03779e8a2622 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -398,7 +398,7 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) case VPORT_E_TX_ERROR: vport->err_stats.tx_errors++; break; - }; + } spin_unlock(&vport->stats_lock); } -- cgit v1.2.1 From adccff34de1ef81564b7e6c436f762e7a1caf807 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 18 Sep 2012 07:10:45 +0000 Subject: net/tipc/name_table.c: Remove unecessary semicolon Found by http://coccinelle.lip6.fr/ Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- net/tipc/name_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 98975e80bb51..46754779fd3d 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -783,7 +783,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, if (!list_is_last(&publ->zone_list, &info->zone_list)) ret += tipc_snprintf(buf + ret, len - ret, "\n%33s", " "); - }; + } ret += tipc_snprintf(buf + ret, len - ret, "\n"); return ret; -- cgit v1.2.1 From 864745d291b5ba80ea0bd0edcbe67273de368836 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 13 Sep 2012 11:41:26 +0000 Subject: xfrm_user: return error pointer instead of NULL When dump_one_state() returns an error, e.g. because of a too small buffer to dump the whole xfrm state, xfrm_state_netlink() returns NULL instead of an error pointer. But its callers expect an error pointer and therefore continue to operate on a NULL skbuff. This could lead to a privilege escalation (execution of user code in kernel context) if the attacker has CAP_NET_ADMIN and is able to map address 0. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..dac08e2a5a93 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -878,6 +878,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -888,9 +889,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.2.1 From c25463722509fef0ed630b271576a8c9a70236f3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 14 Sep 2012 09:58:32 +0000 Subject: xfrm_user: return error pointer instead of NULL #2 When dump_one_policy() returns an error, e.g. because of a too small buffer to dump the whole xfrm policy, xfrm_policy_netlink() returns NULL instead of an error pointer. But its caller expects an error pointer and therefore continues to operate on a NULL skbuff. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac08e2a5a93..d12b62547ad0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1548,6 +1548,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1558,9 +1559,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.2.1 From 0e698bf6624c469cd4f3f391247b142963ca9c4e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 15 Sep 2012 22:44:16 +0000 Subject: net: fix memory leak on oom with zerocopy If orphan flags fails, we don't free the skb on receive, which leaks the skb memory. Return value was also wrong: netif_receive_skb is supposed to return NET_RX_DROP, not ENOMEM. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d7fe32c946c1..ac7609d85187 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3322,7 +3322,7 @@ ncls: if (pt_prev) { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - ret = -ENOMEM; + goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { -- cgit v1.2.1 From 1d57f19539c074105791da6384a8ad674bba8037 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Sep 2012 12:51:39 +0000 Subject: tcp: fix regression in urgent data handling Stephan Springl found that commit 1402d366019fed "tcp: introduce tcp_try_coalesce" introduced a regression for rlogin It turns out problem comes from TCP urgent data handling and a change in behavior in input path. rlogin sends two one-byte packets with URG ptr set, and when next data frame is coalesced, we lack sk_data_ready() calls to wakeup consumer. Signed-off-by: Eric Dumazet Reported-by: Stephan Springl Cc: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6e38c6c23caa..d377f4854cb8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4661,7 +4661,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); - else if (!sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } @@ -5556,8 +5556,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - else - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk, 0); return 0; } } -- cgit v1.2.1 From 433a19548061bb5457b6ab77ed7ea58ca6e43ddb Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2012 22:40:10 +0000 Subject: xfrm: fix a read lock imbalance in make_blackhole if xfrm_policy_get_afinfo returns 0, it has already released the read lock, xfrm_policy_put_afinfo should not be called again. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ab2ce7d5152d..387848e90078 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1764,7 +1764,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } -- cgit v1.2.1 From 3d1cbdd6aefff711bcf389fdabc4af9bc22e8201 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:08 +0200 Subject: Bluetooth: mgmt: Fix enabling SSP while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set SSP will only set dev_flags but won't write changes to controller. As a result remote devices won't use Secure Simple Pairing with our device due to SSP Host Support flag disabled in extended features and may also reject SSP attempt from our side (with possible fallback to legacy pairing). This patch ensures HCI Write Simple Pairing Mode is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca6..f943bbfc9c61 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2875,6 +2875,12 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) if (scan) hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 ssp = 1; + + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); -- cgit v1.2.1 From 562fcc246ebe31ade6e1be08585673b9b2785498 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:09 +0200 Subject: Bluetooth: mgmt: Fix enabling LE while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set LE will only set dev_flags but won't write changes to controller. As a result it's not possible to start device discovery session on LE controller as it uses interleaved discovery which requires LE Supported Host flag in extended features. This patch ensures HCI Write LE Host Supported is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f943bbfc9c61..eba022de3c20 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2881,6 +2881,16 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); } + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 1; + cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR); + + hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); -- cgit v1.2.1 From aad3d0e343900a4c2c5dbc73f76550aa64a0ac1b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:42 +0300 Subject: Bluetooth: Fix freeing uninitialized delayed works When releasing L2CAP socket which is in BT_CONFIG state l2cap_chan_close invokes l2cap_send_disconn_req which cancel delayed works which are only set in BT_CONNECTED state with l2cap_ertm_init. Add state check before cancelling those works. ... [ 9668.574372] [21085] l2cap_sock_release: sock cd065200, sk f073e800 [ 9668.574399] [21085] l2cap_sock_shutdown: sock cd065200, sk f073e800 [ 9668.574411] [21085] l2cap_chan_close: chan f073ec00 state BT_CONFIG sk f073e800 [ 9668.574421] [21085] l2cap_send_disconn_req: chan f073ec00 conn ecc16600 [ 9668.574441] INFO: trying to register non-static key. [ 9668.574443] the code is fine but needs lockdep annotation. [ 9668.574446] turning off the locking correctness validator. [ 9668.574450] Pid: 21085, comm: obex-client Tainted: G O 3.5.0+ #57 [ 9668.574452] Call Trace: [ 9668.574463] [] __lock_acquire+0x12e3/0x1700 [ 9668.574468] [] ? trace_hardirqs_on+0xb/0x10 [ 9668.574476] [] ? printk+0x4d/0x4f [ 9668.574479] [] lock_acquire+0x88/0x130 [ 9668.574487] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574491] [] del_timer_sync+0x50/0xc0 [ 9668.574495] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574515] [] l2cap_send_disconn_req+0xe3/0x160 [bluetooth] ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4ea1710a4783..38c00f142203 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1008,7 +1008,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c if (!conn) return; - if (chan->mode == L2CAP_MODE_ERTM) { + if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); -- cgit v1.2.1 From 78c04c0bf52360dc2f7185e99c8e9aa05d73ae5a Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 14 Sep 2012 16:34:46 -0300 Subject: Bluetooth: Fix not removing power_off delayed work For example, when a usb reset is received (I could reproduce it running something very similar to this[1] in a loop) it could be that the device is unregistered while the power_off delayed work is still scheduled to run. Backtrace: WARNING: at lib/debugobjects.c:261 debug_print_object+0x7c/0x8d() Hardware name: To Be Filled By O.E.M. ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x26 Modules linked in: nouveau mxm_wmi btusb wmi bluetooth ttm coretemp drm_kms_helper Pid: 2114, comm: usb-reset Not tainted 3.5.0bt-next #2 Call Trace: [] ? free_obj_work+0x57/0x91 [] warn_slowpath_common+0x7e/0x97 [] warn_slowpath_fmt+0x41/0x43 [] debug_print_object+0x7c/0x8d [] ? __queue_work+0x259/0x259 [] ? debug_check_no_obj_freed+0x6f/0x1b5 [] debug_check_no_obj_freed+0x98/0x1b5 [] ? bt_host_release+0x10/0x1e [bluetooth] [] kfree+0x90/0xe6 [] bt_host_release+0x10/0x1e [bluetooth] [] device_release+0x4a/0x7e [] kobject_release+0x11d/0x154 [] kobject_put+0x4a/0x4f [] put_device+0x12/0x14 [] hci_free_dev+0x22/0x26 [bluetooth] [] btusb_disconnect+0x96/0x9f [btusb] [] usb_unbind_interface+0x57/0x106 [] __device_release_driver+0x83/0xd6 [] device_release_driver+0x20/0x2d [] usb_driver_release_interface+0x44/0x7b [] usb_forced_unbind_intf+0x45/0x4e [] usb_reset_device+0xa6/0x12e [] usbdev_do_ioctl+0x319/0xe20 [] ? avc_has_perm_flags+0xc9/0x12e [] ? avc_has_perm_flags+0x25/0x12e [] ? do_page_fault+0x31e/0x3a1 [] usbdev_ioctl+0x9/0xd [] vfs_ioctl+0x21/0x34 [] do_vfs_ioctl+0x408/0x44b [] ? file_has_perm+0x76/0x81 [] sys_ioctl+0x51/0x76 [] system_call_fastpath+0x16/0x1b [1] http://cpansearch.perl.org/src/DPAVLIN/Biblio-RFID-0.03/examples/usbreset.c Signed-off-by: Vinicius Costa Gomes Cc: stable@vger.kernel.org Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5a..0b997c8f9655 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -734,6 +734,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_work_sync(&hdev->le_scan); + cancel_delayed_work(&hdev->power_off); + hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); -- cgit v1.2.1 From a85d0d7f3460b1a123b78e7f7e39bf72c37dfb78 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 14 Sep 2012 15:36:57 -0700 Subject: cfg80211: fix possible circular lock on reg_regdb_search() When call_crda() is called we kick off a witch hunt search for the same regulatory domain on our internal regulatory database and that work gets kicked off on a workqueue, this is done while the cfg80211_mutex is held. If that workqueue kicks off it will first lock reg_regdb_search_mutex and later cfg80211_mutex but to ensure two CPUs will not contend against cfg80211_mutex the right thing to do is to have the reg_regdb_search() wait until the cfg80211_mutex is let go. The lockdep report is pasted below. cfg80211: Calling CRDA to update world regulatory domain ====================================================== [ INFO: possible circular locking dependency detected ] 3.3.8 #3 Tainted: G O ------------------------------------------------------- kworker/0:1/235 is trying to acquire lock: (cfg80211_mutex){+.+...}, at: [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] but task is already holding lock: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (reg_regdb_search_mutex){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<81645778>] is_world_regdom+0x9f8/0xc74 [cfg80211] -> #1 (reg_mutex#2){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<8164539c>] is_world_regdom+0x61c/0xc74 [cfg80211] -> #0 (cfg80211_mutex){+.+...}: [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] other info that might help us debug this: Chain exists of: cfg80211_mutex --> reg_mutex#2 --> reg_regdb_search_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reg_regdb_search_mutex); lock(reg_mutex#2); lock(reg_regdb_search_mutex); lock(cfg80211_mutex); *** DEADLOCK *** 3 locks held by kworker/0:1/235: #0: (events){.+.+..}, at: [<80089a00>] process_one_work+0x230/0x460 #1: (reg_regdb_work){+.+...}, at: [<80089a00>] process_one_work+0x230/0x460 #2: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] stack backtrace: Call Trace: [<80290fd4>] dump_stack+0x8/0x34 [<80291bc4>] print_circular_bug+0x2ac/0x2d8 [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] Reported-by: Felix Fietkau Tested-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2ded3c7fad06..72d170ca3406 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); -- cgit v1.2.1 From 92a25256f142d55e25f9959441cea6ddeabae57e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 6 Sep 2012 18:39:26 +0300 Subject: Bluetooth: mgmt: Implement support for passkey notification This patch adds support for Secure Simple Pairing with devices that have KeyboardOnly as their IO capability. Such devices will cause a passkey notification on our side and optionally also keypress notifications. Without this patch some keyboards cannot be paired using the mgmt interface. Signed-off-by: Johan Hedberg Cc: stable@vger.kernel.org Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 17 ++++++++++++ 2 files changed, 84 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 48d730228c2f..ccca88fc6195 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3263,6 +3263,65 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); } +static void hci_user_passkey_notify_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_passkey_notify *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + return; + + conn->passkey_notify = __le32_to_cpu(ev->passkey); + conn->passkey_entered = 0; + + if (test_bit(HCI_MGMT, &hdev->dev_flags)) + mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, + conn->dst_type, conn->passkey_notify, + conn->passkey_entered); +} + +static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_keypress_notify *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + return; + + switch (ev->type) { + case HCI_KEYPRESS_STARTED: + conn->passkey_entered = 0; + return; + + case HCI_KEYPRESS_ENTERED: + conn->passkey_entered++; + break; + + case HCI_KEYPRESS_ERASED: + conn->passkey_entered--; + break; + + case HCI_KEYPRESS_CLEARED: + conn->passkey_entered = 0; + break; + + case HCI_KEYPRESS_COMPLETED: + return; + } + + if (test_bit(HCI_MGMT, &hdev->dev_flags)) + mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, + conn->dst_type, conn->passkey_notify, + conn->passkey_entered); +} + static void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { @@ -3627,6 +3686,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_user_passkey_request_evt(hdev, skb); break; + case HCI_EV_USER_PASSKEY_NOTIFY: + hci_user_passkey_notify_evt(hdev, skb); + break; + + case HCI_EV_KEYPRESS_NOTIFY: + hci_keypress_notify_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 05d4b83a0189..8e1ab59a9cef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -99,6 +99,7 @@ static const u16 mgmt_events[] = { MGMT_EV_DEVICE_BLOCKED, MGMT_EV_DEVICE_UNBLOCKED, MGMT_EV_DEVICE_UNPAIRED, + MGMT_EV_PASSKEY_NOTIFY, }; /* @@ -3276,6 +3277,22 @@ int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, MGMT_OP_USER_PASSKEY_NEG_REPLY); } +int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 link_type, u8 addr_type, u32 passkey, + u8 entered) +{ + struct mgmt_ev_passkey_notify ev; + + BT_DBG("%s", hdev->name); + + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_bdaddr(link_type, addr_type); + ev.passkey = __cpu_to_le32(passkey); + ev.entered = entered; + + return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL); +} + int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { -- cgit v1.2.1 From 23b3b1330abc643e1fbb7cfffcb6947e2583cff2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 6 Sep 2012 18:39:27 +0300 Subject: Bluetooth: Update management interface revision For each kernel release where commands or events are added to the management interface, the revision field should be increment by one. The increment should only happen once per kernel release and not for every command/event that gets added. The revision value is for informational purposes only, but this simple policy would make any future debugging a lot simple. Signed-off-by: Johan Hedberg Cc: stable@vger.kernel.org Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8e1ab59a9cef..8934343be0ea 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -35,7 +35,7 @@ bool enable_hs; #define MGMT_VERSION 1 -#define MGMT_REVISION 1 +#define MGMT_REVISION 2 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit v1.2.1 From 2ad4814fb6ddc78ec17079b5dec9e2cd313a944c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Sep 2012 08:20:24 +0200 Subject: mac80211: make reset debugfs depend on CONFIG_PM The suspend/resume code depends on CONFIG_PM, so the reset debugfs file can only be made available if that is enabled. Fengguang Wu's zero-day build testing found this. Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 97173f8144d4..466f4b45dd94 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -70,6 +70,7 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); +#ifdef CONFIG_PM static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { @@ -88,6 +89,7 @@ static const struct file_operations reset_ops = { .open = simple_open, .llseek = noop_llseek, }; +#endif static ssize_t hwflags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -245,7 +247,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(queues); +#ifdef CONFIG_PM DEBUGFS_ADD_MODE(reset, 0200); +#endif DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); -- cgit v1.2.1 From 552bff0c2fec8953ba3793d75ee335032cc0b47c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Sep 2012 09:26:06 +0200 Subject: cfg80211: constify name parameter to add_virtual_intf The name can't be modified by the driver, make it const. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9bd56a744982..05f3a313db88 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -20,7 +20,8 @@ #include "rate.h" #include "mesh.h" -static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name, +static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, + const char *name, enum nl80211_iftype type, u32 *flags, struct vif_params *params) -- cgit v1.2.1 From b40863c667c16b7a73d4f034a8eab67029b5b15a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Sep 2012 20:44:49 +0000 Subject: net: more accurate network taps in transmit path dev_queue_xmit_nit() should be called right before ndo_start_xmit() calls or we might give wrong packet contents to taps users : Packet checksum can be changed, or packet can be linearized or segmented, and segments partially sent for the later case. Also a memory allocation can fail and packet never really hit the driver entry point. Reported-by: Jamie Gloudon Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index dcc673d0674c..52cd1d7f004a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2213,9 +2213,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2250,6 +2247,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); @@ -2272,6 +2272,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(nskb, dev); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); -- cgit v1.2.1 From 3fd91fb35847a8b3287f00970efc069de16df8b4 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 13 Sep 2012 19:54:57 +0000 Subject: ipv6: recursive check rt->dst.from when call rt6_check_expired If dst cache dst_a copies from dst_b, and dst_b copies from dst_c, check if dst_a is expired or not, we should not end with dst_a->dst.from, dst_b, we should check dst_c. CC: Gao feng Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 83dafa528936..0607ee3a0eac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,15 +369,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static bool rt6_check_expired(const struct rt6_info *rt) { - struct rt6_info *ort = NULL; - if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - ort = (struct rt6_info *) rt->dst.from; - return (ort->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, ort->dst.expires); + return rt6_check_expired((struct rt6_info *) rt->dst.from); } return false; } -- cgit v1.2.1 From 828de4f6bf6e785f7b5497f8f7cfd4d4fbcfdb7e Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 13 Sep 2012 20:58:27 +0000 Subject: net: dev: fix incorrect getting net device's name When moving a nic from net namespace A to net namespace B, in dev_change_net_namesapce,we call __dev_get_by_name to decide if the netns B has the device has the same name. if the netns B already has the same named device,we call dev_get_valid_name to try to get a valid name for this nic in the netns B,but net_device->nd_net still point to netns A now. this patch fix it. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/dev.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 52cd1d7f004a..bb42eb161969 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net_device *dev, const char *name) +static int dev_alloc_name_ns(struct net *net, + struct net_device *dev, + const char *name) { - struct net *net; + char buf[IFNAMSIZ]; + int ret; - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + +static int dev_get_valid_name(struct net *net, + struct net_device *dev, + const char *name) +{ + BUG_ON(!net); if (!dev_valid_name(name)) return -EINVAL; if (strchr(name, '%')) - return dev_alloc_name(dev, name); + return dev_alloc_name_ns(net, dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; else if (dev->name != name) @@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(dev, newname); + err = dev_get_valid_name(net, dev, newname); if (err < 0) return err; @@ -5589,7 +5601,7 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = dev_get_valid_name(dev, dev->name); + ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -6233,7 +6245,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(dev, pat) < 0) + if (dev_get_valid_name(net, dev, pat) < 0) goto out; } -- cgit v1.2.1 From 2c60db037034d27f8c636403355d52872da92f81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Sep 2012 09:17:26 +0000 Subject: net: provide a default dev->ethtool_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of forcing device drivers to provide empty ethtool_ops or tweak net/core/ethtool.c again, we could provide a generic ethtool_ops. This occurred to me when I wanted to add GSO support to GRE tunnels. ethtool -k support should be generic for all drivers. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Maciej Żenczykowski Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ net/core/ethtool.c | 12 ------------ 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index bb42eb161969..bbda81997f4f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5974,6 +5974,8 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) return queue; } +static const struct ethtool_ops default_ethtool_ops; + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6061,6 +6063,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, strcpy(dev->name, name); dev->group = INIT_NETDEV_GROUP; + if (!dev->ethtool_ops) + dev->ethtool_ops = &default_ethtool_ops; return dev; free_all: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index cbf033dcaf1f..4d64cc2e3fa9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1426,18 +1426,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) return -EFAULT; - if (!dev->ethtool_ops) { - /* A few commands do not require any driver support, - * are unprivileged, and do not change anything, so we - * can take a shortcut to them. */ - if (ethcmd == ETHTOOL_GDRVINFO) - return ethtool_get_drvinfo(dev, useraddr); - else if (ethcmd == ETHTOOL_GET_TS_INFO) - return ethtool_get_ts_info(dev, useraddr); - else - return -EOPNOTSUPP; - } - /* Allow some commands to be done by anyone */ switch (ethcmd) { case ETHTOOL_GSET: -- cgit v1.2.1 From 6b78f16e4bdde3936b11690bdb970be423e07a07 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2012 21:25:33 +0000 Subject: gre: add GSO support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add GSO support to GRE tunnels. Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b062a98574f2..f233c1da2077 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -745,6 +745,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + goto tx_error; + if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -1296,6 +1300,11 @@ static void ipgre_dev_free(struct net_device *dev) free_netdev(dev); } +#define GRE_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; @@ -1309,6 +1318,9 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_tunnel_init(struct net_device *dev) -- cgit v1.2.1 From dbd6b11e15a2f96030da17dbeda943a8a98ee990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 14 Sep 2012 00:40:54 +0000 Subject: batman-adv: make batadv_test_bit() return 0 or 1 only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some architectures test_bit() can return other values than 0 or 1: With a generic x86 OpenWrt image in a kvm setup (batadv_)test_bit() frequently returns -1 for me, leading to batadv_iv_ogm_update_seqnos() wrongly signaling a protected seqno window. This patch tries to fix this issue by making batadv_test_bit() return 0 or 1 only. Signed-off-by: Linus Lüssing Acked-by: Sven Eckelmann Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- net/batman-adv/bitarray.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a081ce1c0514..cebaae7e148b 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -20,8 +20,8 @@ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ #define _NET_BATMAN_ADV_BITARRAY_H_ -/* returns true if the corresponding bit in the given seq_bits indicates true - * and curr_seqno is within range of last_seqno +/* Returns 1 if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, uint32_t last_seqno, uint32_t curr_seqno) @@ -32,7 +32,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; else - return test_bit(diff, seq_bits); + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ -- cgit v1.2.1 From 15c041759bfcd9ab0a4e43f1c16e2644977d0467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Fri, 14 Sep 2012 04:59:52 +0000 Subject: tcp: flush DMA queue before sk_wait_data if rcv_wnd is zero If recv() syscall is called for a TCP socket so that - IOAT DMA is used - MSG_WAITALL flag is used - requested length is bigger than sk_rcvbuf - enough data has already arrived to bring rcv_wnd to zero then when tcp_recvmsg() gets to calling sk_wait_data(), receive window can be still zero while sk_async_wait_queue exhausts enough space to keep it zero. As this queue isn't cleaned until the tcp_service_net_dma() call, sk_wait_data() cannot receive any data and blocks forever. If zero receive window and non-empty sk_async_wait_queue is detected before calling sk_wait_data(), process the queue first. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1daf..bf9a8ab29459 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1762,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ -- cgit v1.2.1 From 71261956973ba9e0637848a5adb4a5819b4bae83 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 15 Sep 2012 00:41:35 +0000 Subject: pkt_sched: fix virtual-start-time update in QFQ If the old timestamps of a class, say cl, are stale when the class becomes active, then QFQ may assign to cl a much higher start time than the maximum value allowed. This may happen when QFQ assigns to the start time of cl the finish time of a group whose classes are characterized by a higher value of the ratio max_class_pkt/weight_of_the_class with respect to that of cl. Inserting a class with a too high start time into the bucket list corrupts the data structure and may eventually lead to crashes. This patch limits the maximum start time assigned to a class. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e4723d31fdd5..211a21217045 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -865,7 +865,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } -- cgit v1.2.1 From 6b6e27255f29a6191ef8ad96bfcc392ab2ef6c71 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 17 Sep 2012 10:03:26 +0000 Subject: netdev: make address const in device address management The internal functions for add/deleting addresses don't change their argument. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 6 +++--- net/bridge/br_private.h | 4 ++-- net/core/dev_addr_lists.c | 40 +++++++++++++++++++++------------------- 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ddf93efc133c..02861190a3d4 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -609,7 +609,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr, u16 nlh_flags) + const unsigned char *addr, u16 nlh_flags) { struct net_bridge_port *p; int err = 0; @@ -639,7 +639,7 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev, return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr) +static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) { struct net_bridge *br = p->br; struct hlist_head *head = &br->hash[br_mac_hash(addr)]; @@ -655,7 +655,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr) /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr) + const unsigned char *addr) { struct net_bridge_port *p; int err; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f507d2af9646..11a984b87e62 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -363,10 +363,10 @@ extern void br_fdb_update(struct net_bridge *br, extern int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr); + const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 nlh_flags); extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index c4cc2bc49f06..87cc17db2d56 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,7 @@ */ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); } -static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, - int addr_len, unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, + const unsigned char *addr, int addr_len, + unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, return -ENOENT; } -static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, - int addr_len, unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, + const unsigned char *addr, int addr_len, + unsigned char addr_type) { return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); } @@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init); * * The caller must hold the rtnl_mutex. */ -int dev_addr_add(struct net_device *dev, unsigned char *addr, +int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; @@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add); * * The caller must hold the rtnl_mutex. */ -int dev_addr_del(struct net_device *dev, unsigned char *addr, +int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; @@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple); * @dev: device * @addr: address to add */ -int dev_uc_add_excl(struct net_device *dev, unsigned char *addr) +int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { struct netdev_hw_addr *ha; int err; @@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl); * Add a secondary unicast address to the device or increase * the reference count if it already exists. */ -int dev_uc_add(struct net_device *dev, unsigned char *addr) +int dev_uc_add(struct net_device *dev, const unsigned char *addr) { int err; @@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add); * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. */ -int dev_uc_del(struct net_device *dev, unsigned char *addr) +int dev_uc_del(struct net_device *dev, const unsigned char *addr) { int err; @@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init); * @dev: device * @addr: address to add */ -int dev_mc_add_excl(struct net_device *dev, unsigned char *addr) +int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { struct netdev_hw_addr *ha; int err; @@ -566,7 +568,7 @@ out: } EXPORT_SYMBOL(dev_mc_add_excl); -static int __dev_mc_add(struct net_device *dev, unsigned char *addr, +static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, bool global) { int err; @@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr, * Add a multicast address to the device or increase * the reference count if it already exists. */ -int dev_mc_add(struct net_device *dev, unsigned char *addr) +int dev_mc_add(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, false); } @@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add); * * Add a global multicast address to the device. */ -int dev_mc_add_global(struct net_device *dev, unsigned char *addr) +int dev_mc_add_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, true); } EXPORT_SYMBOL(dev_mc_add_global); -static int __dev_mc_del(struct net_device *dev, unsigned char *addr, +static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, bool global) { int err; @@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr, * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ -int dev_mc_del(struct net_device *dev, unsigned char *addr) +int dev_mc_del(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, false); } @@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del); * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ -int dev_mc_del_global(struct net_device *dev, unsigned char *addr) +int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, true); } -- cgit v1.2.1 From 8c4c49df5cfeb8d56e5b85a430c8cbcb86c2ac37 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 17 Sep 2012 20:16:31 +0000 Subject: netpoll: call ->ndo_select_queue() in tx path In netpoll tx path, we miss the chance of calling ->ndo_select_queue(), thus could cause problems when bonding is involved. This patch makes dev_pick_tx() extern (and rename it to netdev_pick_tx()) to let netpoll call it in netpoll_send_skb_on_dev(). Reported-by: Sylvain Munaut Cc: "David S. Miller" Cc: Eric Dumazet Signed-off-by: Cong Wang Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- net/core/netpoll.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index bbda81997f4f..707b12425a79 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2396,8 +2396,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -static struct netdev_queue *dev_pick_tx(struct net_device *dev, - struct sk_buff *skb) +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) { int queue_index; const struct net_device_ops *ops = dev->netdev_ops; @@ -2571,7 +2571,7 @@ int dev_queue_xmit(struct sk_buff *skb) skb_update_prio(skb); - txq = dev_pick_tx(dev, skb); + txq = netdev_pick_tx(dev, skb); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/core/netpoll.c b/net/core/netpoll.c index dd67818025d1..77a0388fc3be 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -328,7 +328,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = netdev_pick_tx(dev, skb); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; -- cgit v1.2.1 From c038a767cd697238b09f7a4ea5a504b4891774e9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:08 +0000 Subject: ipv6: add a new namespace for nf_conntrack_reasm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed by Michal, it is necessary to add a new namespace for nf_conntrack_reasm code, this prepares for the second patch. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 137 ++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f94fb3ac2a79..f40f327ccc0c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -71,27 +71,26 @@ struct nf_ct_frag6_queue }; static struct inet_frags nf_frags; -static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_init_frags.timeout, + .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_init_frags.low_thresh, + .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_init_frags.high_thresh, + .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -99,7 +98,55 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { } }; -static struct ctl_table_header *nf_ct_frag6_sysctl_header; +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = nf_ct_frag6_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), + GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + } + + hdr = register_net_sysctl(net, "net/netfilter", table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +#else +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + return 0; +} +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ +} #endif static unsigned int nf_hashfn(struct inet_frag_queue *q) @@ -131,13 +178,6 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) inet_frag_kill(&fq->q, &nf_frags); } -static void nf_ct_frag6_evictor(void) -{ - local_bh_disable(); - inet_frag_evictor(&nf_init_frags, &nf_frags); - local_bh_enable(); -} - static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq; @@ -158,9 +198,9 @@ out: } /* Creation primitives. */ - -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) +static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,7 +214,7 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); - q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) goto oom; @@ -312,7 +352,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &nf_init_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -322,7 +362,7 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&nf_frags.lock); return 0; @@ -391,7 +431,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_init_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -415,7 +455,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &nf_init_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); head->local_df = 1; head->next = NULL; @@ -527,6 +567,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; + struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) + : dev_net(skb->dev); struct frag_hdr *fhdr; struct nf_ct_frag6_queue *fq; struct ipv6hdr *hdr; @@ -560,10 +602,13 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) - nf_ct_frag6_evictor(); + if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags); + local_bh_enable(); + } - fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -621,8 +666,31 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, nf_conntrack_put_reasm(skb); } +static int nf_ct_net_init(struct net *net) +{ + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); +} + +static void nf_ct_net_exit(struct net *net) +{ + nf_ct_frags6_sysctl_unregister(net); + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +static struct pernet_operations nf_ct_net_ops = { + .init = nf_ct_net_init, + .exit = nf_ct_net_exit, +}; + int nf_ct_frag6_init(void) { + int ret = 0; + nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -631,32 +699,17 @@ int nf_ct_frag6_init(void) nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; - nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; - inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); -#ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", - nf_ct_frag6_sysctl_table); - if (!nf_ct_frag6_sysctl_header) { + ret = register_pernet_subsys(&nf_ct_net_ops); + if (ret) inet_frags_fini(&nf_frags); - return -ENOMEM; - } -#endif - return 0; + return ret; } void nf_ct_frag6_cleanup(void) { -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); - nf_ct_frag6_sysctl_header = NULL; -#endif + unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); - - nf_init_frags.low_thresh = 0; - nf_ct_frag6_evictor(); } -- cgit v1.2.1 From b836c99fd6c9dfe52a69fa0ba36ec918f80ce02a Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:09 +0000 Subject: ipv6: unify conntrack reassembly expire code with standard one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two years ago, Shan Wei tried to fix this: http://patchwork.ozlabs.org/patch/43905/ The problem is that RFC2460 requires an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded message should be sent to the source of that fragment, if the defragmentation times out. " If insufficient fragments are received to complete reassembly of a packet within 60 seconds of the reception of the first-arriving fragment of that packet, reassembly of that packet must be abandoned and all the fragments that have been received for that packet must be discarded. If the first fragment (i.e., the one with a Fragment Offset of zero) has been received, an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded message should be sent to the source of that fragment. " As Herbert suggested, we could actually use the standard IPv6 reassembly code which follows RFC2460. With this patch applied, I can see ICMP Time Exceeded sent from the receiver when the sender sent out 3/4 fragmented IPv6 UDP packet. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 74 +++++++++------------------------ net/ipv6/reassembly.c | 63 +++++++++------------------- 2 files changed, 38 insertions(+), 99 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f40f327ccc0c..54274c33a0b1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -57,19 +57,6 @@ struct nf_ct_frag6_skb_cb #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) -struct nf_ct_frag6_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL @@ -151,9 +138,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) static unsigned int nf_hashfn(struct inet_frag_queue *q) { - const struct nf_ct_frag6_queue *nq; + const struct frag_queue *nq; - nq = container_of(q, struct nf_ct_frag6_queue, q); + nq = container_of(q, struct frag_queue, q); return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } @@ -163,44 +150,21 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Destruction primitives. */ - -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) -{ - inet_frag_put(&fq->q, &nf_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) -{ - inet_frag_kill(&fq->q, &nf_frags); -} - static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq; - - fq = container_of((struct inet_frag_queue *)data, - struct nf_ct_frag6_queue, q); + struct frag_queue *fq; + struct net *net; - spin_lock(&fq->q.lock); + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, nf_frag.frags); - if (fq->q.last_in & INET_FRAG_COMPLETE) - goto out; - - fq_kill(fq); - -out: - spin_unlock(&fq->q.lock); - fq_put(fq); + ip6_expire_frag_queue(net, fq, &nf_frags); } /* Creation primitives. */ -static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, - u32 user, struct in6_addr *src, - struct in6_addr *dst) +static inline struct frag_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -219,14 +183,14 @@ static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, if (q == NULL) goto oom; - return container_of(q, struct nf_ct_frag6_queue, q); + return container_of(q, struct frag_queue, q); oom: return NULL; } -static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, +static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; @@ -367,7 +331,7 @@ found: return 0; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); err: return -1; } @@ -382,12 +346,12 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) { struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); @@ -570,7 +534,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); struct frag_hdr *fhdr; - struct nf_ct_frag6_queue *fq; + struct frag_queue *fq; struct ipv6hdr *hdr; int fhoff, nhoff; u8 prevhdr; @@ -619,7 +583,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); goto ret_orig; } @@ -631,7 +595,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) } spin_unlock_bh(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); return ret_skb; ret_orig: @@ -695,7 +659,7 @@ int nf_ct_frag6_init(void) nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; - nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4ff9af628e72..0ee553354ed5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -65,24 +65,6 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) -/* - * Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - int iif; - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags ip6_frags; int ip6_frag_nqueues(struct net *net) @@ -159,21 +141,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -/* Destruction primitives. */ - -static __inline__ void fq_put(struct frag_queue *fq) -{ - inet_frag_put(&fq->q, &ip6_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) -{ - inet_frag_kill(&fq->q, &ip6_frags); -} - static void ip6_evictor(struct net *net, struct inet6_dev *idev) { int evicted; @@ -183,22 +150,18 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev) IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); } -static void ip6_frag_expire(unsigned long data) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags) { - struct frag_queue *fq; struct net_device *dev = NULL; - struct net *net; - - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; - fq_kill(fq); + inet_frag_kill(&fq->q, frags); - net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) @@ -222,7 +185,19 @@ out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, frags); +} +EXPORT_SYMBOL(ip6_expire_frag_queue); + +static void ip6_frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct net *net; + + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ipv6.frags); + + ip6_expire_frag_queue(net, fq, &ip6_frags); } static __inline__ struct frag_queue * @@ -391,7 +366,7 @@ found: return -1; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -417,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int nhoff; int sum_truesize; - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); /* Make the one we just received the head. */ if (prev) { @@ -586,7 +561,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &ip6_frags); return ret; } -- cgit v1.2.1 From d4915c087f7c2457c580efc16fe0bfa1a576274d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:10 +0000 Subject: ipv6: make ip6_frag_nqueues() and ip6_frag_mem() static inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0ee553354ed5..cf74f4e79356 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -67,16 +67,6 @@ struct ip6frag_skb_cb static struct inet_frags ip6_frags; -int ip6_frag_nqueues(struct net *net) -{ - return net->ipv6.frags.nqueues; -} - -int ip6_frag_mem(struct net *net) -{ - return atomic_read(&net->ipv6.frags.mem); -} - static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); -- cgit v1.2.1 From 6b102865e7ba9ff1e3c49c32c7187bb427d91798 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:11 +0000 Subject: ipv6: unify fragment thresh handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 9 +++++++-- net/ipv4/ip_fragment.c | 5 ++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++----- net/ipv6/reassembly.c | 16 +++++----------- 4 files changed, 17 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 85190e69297b..4750d2b74d79 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) nf->low_thresh = 0; local_bh_disable(); - inet_frag_evictor(nf, f); + inet_frag_evictor(nf, f, true); local_bh_enable(); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, } EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) +int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) { struct inet_frag_queue *q; int work, evicted = 0; + if (!force) { + if (atomic_read(&nf->mem) <= nf->high_thresh) + return 0; + } + work = atomic_read(&nf->mem) - nf->low_thresh; while (work > 0) { read_lock(&f->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fa6a12c51066..448e68546827 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -219,7 +219,7 @@ static void ip_evictor(struct net *net) { int evicted; - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); + evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); if (evicted) IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } @@ -684,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) - ip_evictor(net); + ip_evictor(net); /* Lookup (or create) queue header */ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54274c33a0b1..1af12fde08df 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -566,11 +566,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { - local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags); - local_bh_enable(); - } + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); + local_bh_enable(); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index cf74f4e79356..da8a4e301b1b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -131,15 +131,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -static void ip6_evictor(struct net *net, struct inet6_dev *idev) -{ - int evicted; - - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); - if (evicted) - IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); -} - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, struct inet_frags *frags) { @@ -515,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int evicted; IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); @@ -539,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) - ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); + if (evicted) + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS, evicted); fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); if (fq != NULL) { -- cgit v1.2.1 From 8ea853fd0b721f14eacff1a5b364fe3e60d2dd82 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 18 Sep 2012 16:53:21 +0000 Subject: net/core: fix comment in skb_try_coalesce It should be the skb which is not cloned Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe00d1208167..e33ebae519c8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3502,7 +3502,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (!skb_cloned(from)) skb_shinfo(from)->nr_frags = 0; - /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + /* if the skb is not cloned this does nothing + * since we set nr_frags to 0. + */ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) skb_frag_ref(from, i); -- cgit v1.2.1 From 2514ec86531481713036081e93a353bbd02e542b Mon Sep 17 00:00:00 2001 From: Sylvain Roger Rieunier Date: Thu, 20 Sep 2012 10:03:29 +0200 Subject: mac80211: fix IBSS auth TX debug message In the IBSS auth TX debug message the BSSID and DA address are reversed, fix that. Signed-off-by: Sylvain Roger Rieunier [reword commit message and make it fit 72 cols] Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 7c082517f0c7..5f3620f0bc0a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -278,7 +278,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (auth && !sdata->u.ibss.auth_frame_registrations) { ibss_dbg(sdata, "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); + sdata->vif.addr, addr, sdata->u.ibss.bssid); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } -- cgit v1.2.1 From bb68b64724a4fd6b93d83b39aeffa4aadb2562fc Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 18 Sep 2012 14:19:23 +0000 Subject: ipv4: Don't add TCP-code in inet_sock_destruct Signed-off-by: Christoph Paasch Acked-by: H.K. Jerry Chu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 -- net/ipv4/tcp.c | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 845372b025f6..766c59658563 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -149,8 +149,6 @@ void inet_sock_destruct(struct sock *sk) pr_err("Attempt to release alive inet socket %p\n", sk); return; } - if (sk->sk_protocol == IPPROTO_TCP) - kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index df83d744e380..7b1e940393cf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2325,6 +2325,13 @@ int tcp_disconnect(struct sock *sk, int flags) } EXPORT_SYMBOL(tcp_disconnect); +void tcp_sock_destruct(struct sock *sk) +{ + inet_sock_destruct(sk); + + kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); +} + static inline bool tcp_can_repair_sock(const struct sock *sk) { return capable(CAP_NET_ADMIN) && -- cgit v1.2.1 From 4308fc58dceda40fac8a8d9c05b7cfba0a6bbed3 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 19 Sep 2012 15:46:06 +0100 Subject: tcp: Document use of undefined variable. Both tcp_timewait_state_process and tcp_check_req use the same basic construct of struct tcp_options received tmp_opt; tmp_opt.saw_tstamp = 0; then call tcp_parse_options However if they are fed a frame containing a TCP_SACK then tbe code behaviour is undefined because opt_rx->sack_ok is undefined data. This ought to be documented if it is intentional. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e965319d610b..5792577b5bc5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) * spinlock it. I do not want! Well, probability of misbehaviour * is ridiculously low and, seems, we could use some mb() tricks * to avoid misread sequence numbers, states etc. --ANK + * + * We don't need to initialize tmp_out.sack_ok as we don't use the results */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, @@ -522,6 +524,8 @@ EXPORT_SYMBOL(tcp_create_openreq_child); * * XXX (TFO) - The current impl contains a special check for ack * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? + * + * We don't need to initialize tmp_opt.sack_ok as we don't use the results */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, -- cgit v1.2.1 From bc26ccd8fc756749de95606d28314efd0ce5aec3 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 19 Sep 2012 09:40:00 +0000 Subject: tcp: restore rcv_wscale in a repair mode (v2) rcv_wscale is a symetric parameter with snd_wscale. Both this parameters are set on a connection handshake. Without this value a remote window size can not be interpreted correctly, because a value from a packet should be shifted on rcv_wscale. And one more thing is that wscale_ok should be set too. This patch doesn't break a backward compatibility. If someone uses it in a old scheme, a rcv window will be restored with the same bug (rcv_wscale = 0). v2: Save backward compatibility on big-endian system. Before the first two bytes were snd_wscale and the second two bytes were rcv_wscale. Now snd_wscale is opt_val & 0xFFFF and rcv_wscale >> 16. This approach is independent on byte ordering. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy CC: Pavel Emelyanov Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf9a8ab29459..5f6419341821 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2331,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, tp->rx_opt.mss_clamp = opt.opt_val; break; case TCPOPT_WINDOW: - if (opt.opt_val > 14) - return -EFBIG; + { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; + + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = opt.opt_val; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) -- cgit v1.2.1 From 4c87308bdea31a7b4828a51f6156e6f721a1fcc9 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:38 +0000 Subject: xfrm_user: fix info leak in copy_to_user_auth() copy_to_user_auth() fails to initialize the remainder of alg_name and therefore discloses up to 54 bytes of heap memory via netlink to userland. Use strncpy() instead of strcpy() to fill the trailing bytes of alg_name with null bytes. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d12b62547ad0..40dd50d6c4cc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -742,7 +742,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; -- cgit v1.2.1 From f778a636713a435d3a922c60b1622a91136560c1 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:39 +0000 Subject: xfrm_user: fix info leak in copy_to_user_state() The memory reserved to dump the xfrm state includes the padding bytes of struct xfrm_usersa_info added by the compiler for alignment (7 for amd64, 3 for i386). Add an explicit memset(0) before filling the buffer to avoid the info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 40dd50d6c4cc..d585459dc8bb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -689,6 +689,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); -- cgit v1.2.1 From 7b789836f434c87168eab067cfbed1ec4783dffd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:40 +0000 Subject: xfrm_user: fix info leak in copy_to_user_policy() The memory reserved to dump the xfrm policy includes multiple padding bytes added by the compiler for alignment (padding bytes in struct xfrm_selector and struct xfrm_userpolicy_info). Add an explicit memset(0) before filling the buffer to avoid the heap info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d585459dc8bb..84dd85ceeeea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1320,6 +1320,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); -- cgit v1.2.1 From 1f86840f897717f86d523a13e99a447e6a5d2fa5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:41 +0000 Subject: xfrm_user: fix info leak in copy_to_user_tmpl() The memory used for the template copy is a local stack variable. As struct xfrm_user_tmpl contains multiple holes added by the compiler for alignment, not initializing the memory will lead to leaking stack bytes to userland. Add an explicit memset(0) to avoid the info leak. Initial version of the patch by Brad Spengler. Cc: Brad Spengler Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 84dd85ceeeea..8024b3dea8c2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1425,6 +1425,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); -- cgit v1.2.1 From ecd7918745234e423dd87fcc0c077da557909720 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 20 Sep 2012 10:01:49 +0000 Subject: xfrm_user: ensure user supplied esn replay window is valid The current code fails to ensure that the netlink message actually contains as many bytes as the header indicates. If a user creates a new state or updates an existing one but does not supply the bytes for the whole ESN replay window, the kernel copies random heap bytes into the replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL netlink attribute. This leads to following issues: 1. The replay window has random bits set confusing the replay handling code later on. 2. A malicious user could use this flaw to leak up to ~3.5kB of heap memory when she has access to the XFRM netlink interface (requires CAP_NET_ADMIN). Known users of the ESN replay window are strongSwan and Steffen's iproute2 patch (). The latter uses the interface with a bitmap supplied while the former does not. strongSwan is therefore prone to run into issue 1. To fix both issues without breaking existing userland allow using the XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a fully specified one. For the former case we initialize the in-kernel bitmap with zero, for the latter we copy the user supplied bitmap. For state updates the full bitmap must be supplied. To prevent overflows in the bitmap length calculation the maximum size of bmp_len is limited to 128 by this patch -- resulting in a maximum replay window of 4096 packets. This should be sufficient for all real life scenarios (RFC 4303 recommends a default replay window size of 64). Cc: Steffen Klassert Cc: Martin Willi Cc: Ben Hutchings Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8024b3dea8c2..5927065e97cf 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; -- cgit v1.2.1 From e3ac104d41a97b42316915020ba228c505447d21 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:43 +0000 Subject: xfrm_user: don't copy esn replay window twice for new states The ESN replay window was already fully initialized in xfrm_alloc_replay_state_esn(). No need to copy it again. Cc: Steffen Klassert Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5927065e97cf..289f4bf18ff0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -461,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -574,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -1848,7 +1849,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; -- cgit v1.2.1 From c0d680e577ff171e7b37dbdb1b1bf5451e851f04 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 19 Sep 2012 15:49:00 +0000 Subject: net: do not disable sg for packets requiring no checksum A change in a series of VLAN-related changes appears to have inadvertently disabled the use of the scatter gather feature of network cards for transmission of non-IP ethernet protocols like ATA over Ethernet (AoE). Below is a reference to the commit that introduces a "harmonize_features" function that turns off scatter gather when the NIC does not support hardware checksumming for the ethernet protocol of an sk buff. commit f01a5236bd4b140198fbcc550f085e8361fd73fa Author: Jesse Gross Date: Sun Jan 9 06:23:31 2011 +0000 net offloading: Generalize netif_get_vlan_features(). The can_checksum_protocol function is not equipped to consider a protocol that does not require checksumming. Calling it for a protocol that requires no checksum is inappropriate. The patch below has harmonize_features call can_checksum_protocol when the protocol needs a checksum, so that the network layer is not forced to perform unnecessary skb linearization on the transmission of AoE packets. Unnecessary linearization results in decreased performance and increased memory pressure, as reported here: http://www.spinics.net/lists/linux-mm/msg15184.html The problem has probably not been widely experienced yet, because only recently has the kernel.org-distributed aoe driver acquired the ability to use payloads of over a page in size, with the patchset recently included in the mm tree: https://lkml.org/lkml/2012/8/28/140 The coraid.com-distributed aoe driver already could use payloads of greater than a page in size, but its users generally do not use the newest kernels. Signed-off-by: Ed Cashin Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ac7609d85187..89e33a5d4d93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2134,7 +2134,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.2.1 From b0041d1b8e73d419f4165c189af7c28aff3a02ec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Sep 2012 21:03:39 +0200 Subject: netfilter: fix IPv6 NAT dependencies in Kconfig * NF_NAT_IPV6 requires IP6_NF_IPTABLES * IP6_NF_TARGET_MASQUERADE, IP6_NF_TARGET_NETMAP, IP6_NF_TARGET_REDIRECT and IP6_NF_TARGET_NPT require NF_NAT_IPV6. This change just mirrors what IPv4 does in Kconfig, for consistency. Reported-by: Randy Dunlap Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 110 ++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 3b73254d7bf1..d8f276b9fd8a 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,18 +25,6 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_IPV6 - tristate "IPv6 NAT" - depends on NF_CONNTRACK_IPV6 - depends on NETFILTER_ADVANCED - select NF_NAT - help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. It is controlled by - the `nat' table in ip6tables, see the man page for ip6tables(8). - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -144,48 +132,6 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. -config IP6_NF_TARGET_MASQUERADE - tristate "MASQUERADE target support" - depends on NF_NAT_IPV6 - help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_NETMAP - tristate "NETMAP target support" - depends on NF_NAT_IPV6 - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_REDIRECT - tristate "REDIRECT target support" - depends on NF_NAT_IPV6 - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_NPT - tristate "NPT (Network Prefix translation) target support" - depends on NETFILTER_ADVANCED - help - This option adds the `SNPT' and `DNPT' target, which perform - stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n @@ -235,9 +181,63 @@ config IP6_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + +if NF_NAT_IPV6 + +config IP6_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_NETMAP + tristate "NETMAP target support" + help + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + help + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_NPT + tristate "NPT (Network Prefix translation) target support" + help + This option adds the `SNPT' and `DNPT' target, which perform + stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. + + To compile it as a module, choose M here. If unsure, say N. + +endif # NF_NAT_IPV6 + endif # IP6_NF_IPTABLES endmenu -- cgit v1.2.1 From b0cdb1d9a9522b4f0905f11e4c7d7a59e0f7dc44 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 19 Sep 2012 20:57:04 +0000 Subject: netfilter: nf_nat: fix oops when unloading protocol modules When unloading a protocol module nf_ct_iterate_cleanup() is used to remove all conntracks using the protocol from the bysource hash and clean their NAT sections. Since the conntrack isn't actually killed, the NAT callback is invoked twice, once for each direction, which causes an oops when trying to delete it from the bysource hash for the second time. The same oops can also happen when removing both an L3 and L4 protocol since the cleanup function doesn't check whether the conntrack has already been cleaned up. Pid: 4052, comm: modprobe Not tainted 3.6.0-rc3-test-nat-unload-fix+ #32 Red Hat KVM RIP: 0010:[] [] nf_nat_proto_clean+0x73/0xd0 [nf_nat] RSP: 0018:ffff88007808fe18 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800728550c0 RCX: ffff8800756288b0 RDX: dead000000200200 RSI: ffff88007808fe88 RDI: ffffffffa002f208 RBP: ffff88007808fe28 R08: ffff88007808e000 R09: 0000000000000000 R10: dead000000200200 R11: dead000000100100 R12: ffffffff81c6dc00 R13: ffff8800787582b8 R14: ffff880078758278 R15: ffff88007808fe88 FS: 00007f515985d700(0000) GS:ffff88007cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f515986a000 CR3: 000000007867a000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 4052, threadinfo ffff88007808e000, task ffff8800756288b0) Stack: ffff88007808fe68 ffffffffa002c290 ffff88007808fe78 ffffffff815614e3 ffffffff00000000 00000aeb00000246 ffff88007808fe68 ffffffff81c6dc00 ffff88007808fe88 ffffffffa00358a0 0000000000000000 000000000040f5b0 Call Trace: [] ? nf_nat_net_exit+0x50/0x50 [nf_nat] [] nf_ct_iterate_cleanup+0xc3/0x170 [] nf_nat_l3proto_unregister+0x8a/0x100 [nf_nat] [] ? compat_prepare_timeout+0x13/0xb0 [] nf_nat_l3proto_ipv4_exit+0x10/0x23 [nf_nat_ipv4] ... To fix this, - check whether the conntrack has already been cleaned up in nf_nat_proto_clean - change nf_ct_iterate_cleanup() to only invoke the callback function once for each conntrack (IP_CT_DIR_ORIGINAL). The second change doesn't affect other callers since when conntracks are actually killed, both directions are removed from the hash immediately and the callback is already only invoked once. If it is not killed, the second callback invocation will always return the same decision not to kill it. Reported-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy Acked-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 ++ net/netfilter/nf_nat_core.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dcb27910ab3c..0f241be28f9e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1224,6 +1224,8 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 1816ad381485..65cf694bb8eb 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -481,6 +481,8 @@ static int nf_nat_proto_clean(struct nf_conn *i, void *data) if (!nat) return 0; + if (!(i->status & IPS_SRC_NAT_DONE)) + return 0; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; -- cgit v1.2.1 From 136251d02ff283e99f023b0abdeb52b4b3423a56 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Thu, 20 Sep 2012 03:52:04 +0000 Subject: netfilter: nf_nat: remove obsolete rcu_read_unlock call hlist walk in find_appropriate_src() is not protected anymore by rcu_read_lock(), so rcu_read_unlock() is unnecessary if in_range() matches. This bug was added in (c7232c9 netfilter: add protocol independent NAT core). Signed-off-by: Ulrich Weber Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 65cf694bb8eb..5f2f9109f461 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -201,10 +201,8 @@ find_appropriate_src(struct net *net, u16 zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(l3proto, l4proto, result, range)) { - rcu_read_unlock(); + if (in_range(l3proto, l4proto, result, range)) return 1; - } } } return 0; -- cgit v1.2.1 From b3d54b3e406b5d6ac391590bf7524e887e8e13c3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 11:37:59 +0200 Subject: netfilter: combine ipt_NETMAP and ip6t_NETMAP Combine more modules since the actual code is so small anyway that the kmod metadata and the module in its loaded state totally outweighs the combined actual code size. IP_NF_TARGET_NETMAP becomes a compat option; IP6_NF_TARGET_NETMAP is completely eliminated since it has not see a release yet. Signed-off-by: Jan Engelhardt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 11 ++- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_NETMAP.c | 101 ------------------------ net/ipv6/netfilter/Kconfig | 9 --- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_NETMAP.c | 94 ---------------------- net/netfilter/Kconfig | 10 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_NETMAP.c | 165 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 181 insertions(+), 212 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_NETMAP.c delete mode 100644 net/ipv6/netfilter/ip6t_NETMAP.c create mode 100644 net/netfilter/xt_NETMAP.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 131e53702e77..6f140084004f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -172,12 +172,11 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_NETMAP tristate "NETMAP target support" depends on NETFILTER_ADVANCED - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_NETMAP + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b7dd18987237..f4446c5d6595 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c deleted file mode 100644 index 85028dc0425d..000000000000 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ /dev/null @@ -1,101 +0,0 @@ -/* NETMAP - static NAT mapping of IP network addresses (1:1). - * The mapping can be applied to source (POSTROUTING), - * destination (PREROUTING), or both (with separate rules). - */ - -/* (C) 2000-2001 Svenning Soerensen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Svenning Soerensen "); -MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); - -static int netmap_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 new_ip, netmask; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); - ct = nf_ct_get(skb, &ctinfo); - - netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) - new_ip = ip_hdr(skb)->daddr & ~netmask; - else - new_ip = ip_hdr(skb)->saddr & ~netmask; - new_ip |= mr->range[0].min_ip & netmask; - - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = new_ip; - newrange.max_addr.ip = new_ip; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); -} - -static struct xt_target netmap_tg_reg __read_mostly = { - .name = "NETMAP", - .family = NFPROTO_IPV4, - .target = netmap_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .checkentry = netmap_tg_check, - .me = THIS_MODULE -}; - -static int __init netmap_tg_init(void) -{ - return xt_register_target(&netmap_tg_reg); -} - -static void __exit netmap_tg_exit(void) -{ - xt_unregister_target(&netmap_tg_reg); -} - -module_init(netmap_tg_init); -module_exit(netmap_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d8f276b9fd8a..007bb450f04f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,15 +209,6 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_NETMAP - tristate "NETMAP target support" - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_TARGET_REDIRECT tristate "REDIRECT target support" help diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 5752132ca159..de8e0d11338d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,7 +35,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o -obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NETMAP.c b/net/ipv6/netfilter/ip6t_NETMAP.c deleted file mode 100644 index 4f3bf360e50f..000000000000 --- a/net/ipv6/netfilter/ip6t_NETMAP.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Svenning Soerensen's IPv4 NETMAP target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include - -static unsigned int -netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - union nf_inet_addr new_addr, netmask; - unsigned int i; - - ct = nf_ct_get(skb, &ctinfo); - for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) - netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ - range->max_addr.ip6[i]); - - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) - new_addr.in6 = ipv6_hdr(skb)->daddr; - else - new_addr.in6 = ipv6_hdr(skb)->saddr; - - for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { - new_addr.ip6[i] &= ~netmask.ip6[i]; - new_addr.ip6[i] |= range->min_addr.ip6[i] & - netmask.ip6[i]; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr = new_addr; - newrange.max_addr = new_addr; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); -} - -static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - - if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) - return -EINVAL; - return 0; -} - -static struct xt_target netmap_tg6_reg __read_mostly = { - .name = "NETMAP", - .family = NFPROTO_IPV6, - .target = netmap_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .checkentry = netmap_tg6_checkentry, - .me = THIS_MODULE, -}; - -static int __init netmap_tg6_init(void) -{ - return xt_register_target(&netmap_tg6_reg); -} - -static void netmap_tg6_exit(void) -{ - xt_unregister_target(&netmap_tg6_reg); -} - -module_init(netmap_tg6_init); -module_exit(netmap_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv6 subnets"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3f4b3b4a7762..ad0e0da62ede 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -648,6 +648,16 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_TARGET_NETMAP + tristate '"NETMAP" target support' + depends on NF_NAT + ---help--- + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0baa3f104fcb..600d28ba514c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c new file mode 100644 index 000000000000..b253e07cb1c5 --- /dev/null +++ b/net/netfilter/xt_NETMAP.c @@ -0,0 +1,165 @@ +/* + * (C) 2000-2001 Svenning Soerensen + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + union nf_inet_addr new_addr, netmask; + unsigned int i; + + ct = nf_ct_get(skb, &ctinfo); + for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) + netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ + range->max_addr.ip6[i]); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_addr.in6 = ipv6_hdr(skb)->daddr; + else + new_addr.in6 = ipv6_hdr(skb)->saddr; + + for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { + new_addr.ip6[i] &= ~netmask.ip6[i]; + new_addr.ip6[i] |= range->min_addr.ip6[i] & + netmask.ip6[i]; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = new_addr; + newrange.max_addr = new_addr; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return -EINVAL; + return 0; +} + +static unsigned int +netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 new_ip, netmask; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); + ct = nf_ct_get(skb, &ctinfo); + + netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_ip = ip_hdr(skb)->daddr & ~netmask; + else + new_ip = ip_hdr(skb)->saddr & ~netmask; + new_ip |= mr->range[0].min_ip & netmask; + + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = new_ip; + newrange.max_addr.ip = new_ip; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static struct xt_target netmap_tg_reg[] __read_mostly = { + { + .name = "NETMAP", + .family = NFPROTO_IPV6, + .revision = 0, + .target = netmap_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg6_checkentry, + .me = THIS_MODULE, + }, + { + .name = "NETMAP", + .family = NFPROTO_IPV4, + .revision = 0, + .target = netmap_tg4, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg4_check, + .me = THIS_MODULE, + }, +}; + +static int __init netmap_tg_init(void) +{ + return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +static void netmap_tg_exit(void) +{ + xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +module_init(netmap_tg_init); +module_exit(netmap_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS("ip6t_NETMAP"); +MODULE_ALIAS("ipt_NETMAP"); -- cgit v1.2.1 From 2cbc78a29e76a2e92c172651204f3117491877d2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 11:41:34 +0200 Subject: netfilter: combine ipt_REDIRECT and ip6t_REDIRECT Combine more modules since the actual code is so small anyway that the kmod metadata and the module in its loaded state totally outweighs the combined actual code size. IP_NF_TARGET_REDIRECT becomes a compat option; IP6_NF_TARGET_REDIRECT is completely eliminated since it has not see a release yet. Signed-off-by: Jan Engelhardt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 12 +-- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_REDIRECT.c | 113 ---------------------- net/ipv6/netfilter/Kconfig | 10 -- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_REDIRECT.c | 98 ------------------- net/netfilter/Kconfig | 11 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_REDIRECT.c | 190 +++++++++++++++++++++++++++++++++++++ 9 files changed, 207 insertions(+), 230 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_REDIRECT.c delete mode 100644 net/ipv6/netfilter/ip6t_REDIRECT.c create mode 100644 net/netfilter/xt_REDIRECT.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 6f140084004f..d8d6f2a5bf12 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -181,13 +181,11 @@ config IP_NF_TARGET_NETMAP config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" depends on NETFILTER_ADVANCED - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_REDIRECT + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. endif diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index f4446c5d6595..007b128eecc9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c deleted file mode 100644 index 11407d7d2472..000000000000 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ /dev/null @@ -1,113 +0,0 @@ -/* Redirect. Simple mapping which alters dst to a local IP address. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team "); -MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); - -/* FIXME: Take multiple ranges --RR */ -static int redirect_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} - -static struct xt_target redirect_tg_reg __read_mostly = { - .name = "REDIRECT", - .family = NFPROTO_IPV4, - .target = redirect_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = redirect_tg_check, - .me = THIS_MODULE, -}; - -static int __init redirect_tg_init(void) -{ - return xt_register_target(&redirect_tg_reg); -} - -static void __exit redirect_tg_exit(void) -{ - xt_unregister_target(&redirect_tg_reg); -} - -module_init(redirect_tg_init); -module_exit(redirect_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 007bb450f04f..c72532a60d88 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,16 +209,6 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_REDIRECT - tristate "REDIRECT target support" - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" help diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index de8e0d11338d..2d11fcc2cf3c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,5 +36,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o -obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_REDIRECT.c b/net/ipv6/netfilter/ip6t_REDIRECT.c deleted file mode 100644 index 60497a3c6004..000000000000 --- a/net/ipv6/netfilter/ip6t_REDIRECT.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; - -static unsigned int -redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = loopback_addr; - else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} - -static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - - if (range->flags & NF_NAT_RANGE_MAP_IPS) - return -EINVAL; - return 0; -} - -static struct xt_target redirect_tg6_reg __read_mostly = { - .name = "REDIRECT", - .family = NFPROTO_IPV6, - .checkentry = redirect_tg6_checkentry, - .target = redirect_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .me = THIS_MODULE, -}; - -static int __init redirect_tg6_init(void) -{ - return xt_register_target(&redirect_tg6_reg); -} - -static void __exit redirect_tg6_exit(void) -{ - xt_unregister_target(&redirect_tg6_reg); -} - -module_init(redirect_tg6_init); -module_exit(redirect_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ad0e0da62ede..fefa514b9917 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -690,6 +690,17 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NF_NAT + ---help--- + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 600d28ba514c..32596978df1d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o +obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c new file mode 100644 index 000000000000..22a10309297c --- /dev/null +++ b/net/netfilter/xt_REDIRECT.c @@ -0,0 +1,190 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +static unsigned int +redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = loopback_addr; + else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +/* FIXME: Take multiple ranges --RR */ +static int redirect_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static unsigned int +redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 newdst; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = htonl(0x7F000001); + else { + struct in_device *indev; + struct in_ifaddr *ifa; + + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + if (indev && (ifa = indev->ifa_list)) + newdst = ifa->ifa_local; + rcu_read_unlock(); + + if (!newdst) + return NF_DROP; + } + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static struct xt_target redirect_tg_reg[] __read_mostly = { + { + .name = "REDIRECT", + .family = NFPROTO_IPV6, + .revision = 0, + .table = "nat", + .checkentry = redirect_tg6_checkentry, + .target = redirect_tg6, + .targetsize = sizeof(struct nf_nat_range), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "REDIRECT", + .family = NFPROTO_IPV4, + .revision = 0, + .table = "nat", + .target = redirect_tg4, + .checkentry = redirect_tg4_check, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init redirect_tg_init(void) +{ + return xt_register_targets(redirect_tg_reg, + ARRAY_SIZE(redirect_tg_reg)); +} + +static void __exit redirect_tg_exit(void) +{ + xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg)); +} + +module_init(redirect_tg_init); +module_exit(redirect_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); +MODULE_ALIAS("ip6t_REDIRECT"); +MODULE_ALIAS("ipt_REDIRECT"); -- cgit v1.2.1 From 7f1611469b67739df260a6487b2a5e199e8eeba1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2012 12:53:34 +0200 Subject: mac80211: don't send delBA before disassoc When we disassociate, it's not really useful to send delBA action frames since we're going to send disassoc/deauth anyway, so change that. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2dbd9e1e3583..e714ed8bb198 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1399,7 +1399,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, ifmgd->bssid); if (sta) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, tx); + ieee80211_sta_tear_down_BA_sessions(sta, false); } mutex_unlock(&local->sta_mtx); -- cgit v1.2.1 From 582bb505b67847600ee27e4cda108bb99a8b6306 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2012 12:54:46 +0200 Subject: mac80211: don't send delBA when removing stations When a station is removed and we stop the aggregation sessions, it's not useful to send delBA since this is due to us or the station disassociating or dropping the connection in some other way, so change that. Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 9c8cd8b8f753..797dd36a220d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -738,7 +738,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions(sta, false); ret = sta_info_hash_del(local, sta); if (ret) -- cgit v1.2.1 From c6f219dc83fbb30c8426fcc7850c28d9d33dee44 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2012 13:04:32 +0200 Subject: mac80211: don't send delBA on addBA failure There's no reason to send a delBA when the peer refused our addBA, so change that. Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index d0deb3edae21..3195a6307f50 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -869,7 +869,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } else { ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, - true); + false); } out: -- cgit v1.2.1 From fc18162520b642a46ea556300bfdee80810fc538 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 20 Sep 2012 20:36:06 +0000 Subject: l2tp: fix compile error when CONFIG_IPV6=m and CONFIG_L2TP=y When CONFIG_IPV6=m and CONFIG_L2TP=y, I got the following compile error: LD init/built-in.o net/built-in.o: In function `l2tp_xmit_core': l2tp_core.c:(.text+0x147781): undefined reference to `inet6_csk_xmit' net/built-in.o: In function `l2tp_tunnel_create': (.text+0x149067): undefined reference to `udpv6_encap_enable' net/built-in.o: In function `l2tp_ip6_recvmsg': l2tp_ip6.c:(.text+0x14e991): undefined reference to `ipv6_recv_error' net/built-in.o: In function `l2tp_ip6_sendmsg': l2tp_ip6.c:(.text+0x14ec64): undefined reference to `fl6_sock_lookup' l2tp_ip6.c:(.text+0x14ed6b): undefined reference to `datagram_send_ctl' l2tp_ip6.c:(.text+0x14eda0): undefined reference to `fl6_sock_lookup' l2tp_ip6.c:(.text+0x14ede5): undefined reference to `fl6_merge_options' l2tp_ip6.c:(.text+0x14edf4): undefined reference to `ipv6_fixup_options' l2tp_ip6.c:(.text+0x14ee5d): undefined reference to `fl6_update_dst' l2tp_ip6.c:(.text+0x14eea3): undefined reference to `ip6_dst_lookup_flow' l2tp_ip6.c:(.text+0x14eee7): undefined reference to `ip6_dst_hoplimit' l2tp_ip6.c:(.text+0x14ef8b): undefined reference to `ip6_append_data' l2tp_ip6.c:(.text+0x14ef9d): undefined reference to `ip6_flush_pending_frames' l2tp_ip6.c:(.text+0x14efe2): undefined reference to `ip6_push_pending_frames' net/built-in.o: In function `l2tp_ip6_destroy_sock': l2tp_ip6.c:(.text+0x14f090): undefined reference to `ip6_flush_pending_frames' l2tp_ip6.c:(.text+0x14f0a0): undefined reference to `inet6_destroy_sock' net/built-in.o: In function `l2tp_ip6_connect': l2tp_ip6.c:(.text+0x14f14d): undefined reference to `ip6_datagram_connect' net/built-in.o: In function `l2tp_ip6_bind': l2tp_ip6.c:(.text+0x14f4fe): undefined reference to `ipv6_chk_addr' net/built-in.o: In function `l2tp_ip6_init': l2tp_ip6.c:(.init.text+0x73fa): undefined reference to `inet6_add_protocol' l2tp_ip6.c:(.init.text+0x740c): undefined reference to `inet6_register_protosw' net/built-in.o: In function `l2tp_ip6_exit': l2tp_ip6.c:(.exit.text+0x1954): undefined reference to `inet6_unregister_protosw' l2tp_ip6.c:(.exit.text+0x1965): undefined reference to `inet6_del_protocol' net/built-in.o:(.rodata+0xf2d0): undefined reference to `inet6_release' net/built-in.o:(.rodata+0xf2d8): undefined reference to `inet6_bind' net/built-in.o:(.rodata+0xf308): undefined reference to `inet6_ioctl' net/built-in.o:(.data+0x1af40): undefined reference to `ipv6_setsockopt' net/built-in.o:(.data+0x1af48): undefined reference to `ipv6_getsockopt' net/built-in.o:(.data+0x1af50): undefined reference to `compat_ipv6_setsockopt' net/built-in.o:(.data+0x1af58): undefined reference to `compat_ipv6_getsockopt' make: *** [vmlinux] Error 1 This is due to l2tp uses symbols from IPV6, so when IPV6 is a module, l2tp is not allowed to be builtin. Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/l2tp/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index 4b1e71751e10..147a8fd47a17 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -4,6 +4,7 @@ menuconfig L2TP tristate "Layer Two Tunneling Protocol (L2TP)" + depends on (IPV6 || IPV6=n) depends on INET ---help--- Layer Two Tunneling Protocol -- cgit v1.2.1 From f950c0ecc78f745e490d615280e031de4dbb1306 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 20 Sep 2012 18:29:56 +0000 Subject: ipv6: fix return value check in fib6_add() In case of error, the function fib6_add_1() returns ERR_PTR() or NULL pointer. The ERR_PTR() case check is missing in fib6_add(). dpatch engine is used to generated this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 13690d650c3e..286acfc21250 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -819,6 +819,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) offsetof(struct rt6_info, rt6i_src), allow_create, replace_required); + if (IS_ERR(sn)) { + err = PTR_ERR(sn); + sn = NULL; + } if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node -- cgit v1.2.1 From bf5b30b8a4416de04f1ac1196281ddb318669464 Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Thu, 20 Sep 2012 22:37:25 +0000 Subject: net: change return values from -EACCES to -EPERM Change return value from -EACCES to -EPERM when the permission check fails. Signed-off-by: Zhao Hongjiang Signed-off-by: David S. Miller --- net/bluetooth/bnep/sock.c | 4 ++-- net/bluetooth/cmtp/sock.c | 4 ++-- net/bluetooth/hci_sock.c | 16 ++++++++-------- net/bluetooth/hidp/sock.c | 4 ++-- net/ipv4/devinet.c | 4 ++-- net/netrom/af_netrom.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b410e0b..1eaacf10d19d 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -58,7 +58,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -84,7 +84,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d14571..32dc83dcb6b2 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -72,7 +72,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case CMTPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case CMTPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 19fdac78e555..d5ace1eda3ed 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -490,7 +490,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return -EPERM; @@ -510,12 +510,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_add(hdev, (void __user *) arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *) arg); default: @@ -546,22 +546,22 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: @@ -573,7 +573,7 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f6892a36..b24fb3bd8625 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -56,7 +56,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case HIDPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -91,7 +91,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case HIDPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9b55b6f5a585..e12fad773852 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCSIFFLAGS: - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; break; @@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; ret = -EINVAL; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1b9024ee963c..7261eb81974f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -601,7 +601,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); release_sock(sk); - return -EACCES; + return -EPERM; } nr->user_addr = addr->fsa_digipeater[0]; nr->source_addr = addr->fsa_ax25.sax25_call; -- cgit v1.2.1 From 5e953778a2aab04929a5e7b69f53dc26e39b079e Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Fri, 21 Sep 2012 08:31:19 +0000 Subject: ipconfig: add nameserver IPs to kernel-parameter ip= On small systems (e.g. embedded ones) IP addresses are often configured by bootloaders and get assigned to kernel via parameter "ip=". If set to "ip=dhcp", even nameserver entries from DHCP daemons are handled. These entries exported in /proc/net/pnp are commonly linked by /etc/resolv.conf. To configure nameservers for networks without DHCP, this patch adds option and to kernel-parameter 'ip='. Signed-off-by: Christoph Fritz Tested-by: Jan Weitzel Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67e8a6b086ea..1c0e7e051044 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -743,14 +743,22 @@ static void __init ic_bootp_init_ext(u8 *e) /* - * Initialize the DHCP/BOOTP mechanism. + * Predefine Nameservers */ -static inline void __init ic_bootp_init(void) +static inline void __init ic_nameservers_predef(void) { int i; for (i = 0; i < CONF_NAMESERVERS_MAX; i++) ic_nameservers[i] = NONE; +} + +/* + * Initialize the DHCP/BOOTP mechanism. + */ +static inline void __init ic_bootp_init(void) +{ + ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); } @@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; + unsigned int i; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - pr_cont("\n"); + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) { + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + break; + } + for (i++; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) + pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); #endif /* !SILENT */ return 0; @@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + ic_nameservers_predef(); + /* Parse string for static IP assignment. */ ip = addrs; while (ip && *ip) { @@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs) ic_enable = 0; } break; + case 7: + if (CONF_NAMESERVERS_MAX >= 1) { + ic_nameservers[0] = in_aton(ip); + if (ic_nameservers[0] == ANY) + ic_nameservers[0] = NONE; + } + break; + case 8: + if (CONF_NAMESERVERS_MAX >= 2) { + ic_nameservers[1] = in_aton(ip); + if (ic_nameservers[1] == ANY) + ic_nameservers[1] = NONE; + } + break; } } ip = cp; -- cgit v1.2.1 From 6e27c9b4ee8f348770be5751e6a845ff52a31e19 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 21 Sep 2012 21:44:58 +0200 Subject: netfilter: ipset: Fix sparse warnings "incorrect type in assignment" Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ip.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipport.c | 13 +++++++------ net/netfilter/ipset/ip_set_hash_ipportip.c | 13 +++++++------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 21 ++++++++++++--------- net/netfilter/ipset/ip_set_hash_net.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netiface.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netport.c | 13 +++++++------ 7 files changed, 39 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index a68dbd4f1e4e..42bccd58cbc6 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -114,7 +114,7 @@ nla_put_failure: static inline void hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -188,7 +188,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip += hosts) { nip = htonl(ip); if (nip == 0) diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 92722bb82eea..e0ce0daefac1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -130,8 +130,8 @@ static inline void hash_ipport4_data_next(struct ip_set_hash *h, const struct hash_ipport4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -231,9 +231,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); @@ -349,7 +350,7 @@ static inline void hash_ipport6_data_next(struct ip_set_hash *h, const struct hash_ipport6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -431,7 +432,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 0637ce096def..c864bf40e6be 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -133,8 +133,8 @@ static inline void hash_ipportip4_data_next(struct ip_set_hash *h, const struct hash_ipportip4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -239,9 +239,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); @@ -362,7 +363,7 @@ static inline void hash_ipportip6_data_next(struct ip_set_hash *h, const struct hash_ipportip6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -449,7 +450,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 1ce21ca976e1..2c704bb3cff1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -173,9 +173,9 @@ static inline void hash_ipportnet4_data_next(struct ip_set_hash *h, const struct hash_ipportnet4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); - h->next.ip2 = ntohl(d->ip2); + h->next.ip = d->ip; + h->next.port = d->port; + h->next.ip2 = d->ip2; } static int @@ -314,14 +314,17 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { data.ip = htonl(ip); - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.port = htons(p); - ip2 = retried && ip == h->next.ip && p == h->next.port - ? h->next.ip2 : ip2_from; + ip2 = retried + && ip == ntohl(h->next.ip) + && p == ntohs(h->next.port) + ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { data.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, @@ -486,7 +489,7 @@ static inline void hash_ipportnet6_data_next(struct ip_set_hash *h, const struct hash_ipportnet6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -598,7 +601,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c57a6a09906d..d676093822b1 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -152,7 +152,7 @@ static inline void hash_net4_data_next(struct ip_set_hash *h, const struct hash_net4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -235,7 +235,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_HASH_RANGE; } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index d5d3607ae7bc..632693f0beeb 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -277,7 +277,7 @@ static inline void hash_netiface4_data_next(struct ip_set_hash *h, const struct hash_netiface4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -409,7 +409,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index fc3143a2d41b..3ec27fccddd7 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -171,8 +171,8 @@ static inline void hash_netport4_data_next(struct ip_set_hash *h, const struct hash_netport4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -289,12 +289,13 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); data.cidr = cidr - 1; - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.port = htons(p); ret = adtfn(set, &data, timeout, flags); @@ -450,7 +451,7 @@ static inline void hash_netport6_data_next(struct ip_set_hash *h, const struct hash_netport6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -554,7 +555,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); -- cgit v1.2.1 From b9fed748185a96b7cfe74afac4bd228e8af16f01 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 4 Sep 2012 17:45:59 +0200 Subject: netfilter: ipset: Check and reject crazy /0 input parameters bitmap:ip and bitmap:ip,mac type did not reject such a crazy range when created and using such a set results in a kernel crash. The hash types just silently ignored such parameters. Reject invalid /0 input parameters explicitely. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 ++++++---- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 5 +++-- net/netfilter/ipset/ip_set_hash_ip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 7e1b061aeeba..02184b5ef9e1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -284,7 +284,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -454,7 +454,8 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, elements; + u32 first_ip, last_ip, hosts; + u64 elements; u8 netmask = 32; int ret; @@ -497,7 +498,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (netmask == 32) { hosts = 1; - elements = last_ip - first_ip + 1; + elements = (u64)last_ip - first_ip + 1; } else { u8 mask_bits; u32 mask; @@ -515,7 +516,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; - pr_debug("hosts %u, elements %u\n", hosts, elements); + pr_debug("hosts %u, elements %llu\n", + hosts, (unsigned long long)elements); map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index d7eaf10edb6d..6819d3cff919 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -557,7 +557,8 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, elements; + u32 first_ip, last_ip; + u64 elements; struct bitmap_ipmac *map; int ret; @@ -588,7 +589,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } else return -IPSET_ERR_PROTOCOL; - elements = last_ip - first_ip + 1; + elements = (u64)last_ip - first_ip + 1; if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 42bccd58cbc6..bc8f76edc260 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -179,7 +179,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e0ce0daefac1..6760fd4d7858 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -217,7 +217,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index c864bf40e6be..ac09bec274f1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -225,7 +225,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2c704bb3cff1..242814e4db4e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -290,7 +290,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } -- cgit v1.2.1 From 5ce765a540f34d1e2005e1210f49f67fdf11e997 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 21 Sep 2012 17:59:58 -0500 Subject: libceph: only kunmap kmapped pages In write_partial_msg_pages(), pages need to be kmapped in order to perform a CRC-32c calculation on them. As an artifact of the way this code used to be structured, the kunmap() call was separated from the kmap() call and both were done conditionally. But the conditions under which the kmap() and kunmap() calls were made differed, so there was a chance a kunmap() call would be done on a page that had not been mapped. The symptom of this was tripping a BUG() in kunmap_high() when pkmap_count[nr] became 0. Reported-by: Bryan K. Wright Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 24c5eea8c45b..159aa8bef9e7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1073,16 +1073,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; crc = crc32c(crc, base, len); + kunmap(page); msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, con->out_msg_pos.page_pos + bio_offset, len, 1); - - if (do_datacrc) - kunmap(page); - if (ret <= 0) goto out; -- cgit v1.2.1 From ab43ed8b7490cb387782423ecf74aeee7237e591 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 Sep 2012 00:08:29 +0000 Subject: ipv4: raw: fix icmp_filter() icmp_filter() should not modify its input, or else its caller would need to recompute ip_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969ea..d23c6571ba1c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ -- cgit v1.2.1 From 623df484a777f3c00c1ea3d6a7565b8d8ac688a1 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:54 +0000 Subject: tcp: extract code to compute SYNACK RTT In preparation for adding another spot where we compute the SYNACK RTT, extract this code so that it can be shared. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 +--- net/ipv6/tcp_ipv6.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e64abed249cc..1e66f7fb4fe6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1733,9 +1733,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f3bfb8bbfdec..cfeeeb7fcf54 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1348,9 +1348,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; -- cgit v1.2.1 From 016818d076871c4ee34db1e8d74dc17ac1de626a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:55 +0000 Subject: tcp: TCP Fast Open Server - take SYNACK RTT after completing 3WHS When taking SYNACK RTT samples for servers using TCP Fast Open, fix the code to ensure that we only call tcp_valid_rtt_meas() after we receive the ACK that completes the 3-way handshake. Previously we were always taking an RTT sample in tcp_v4_syn_recv_sock(). However, for TCP Fast Open connections tcp_v4_conn_req_fastopen() calls tcp_v4_syn_recv_sock() at the time we receive the SYN. So for TFO we must wait until tcp_rcv_state_process() to take the RTT sample. To fix this, we wait until after TFO calls tcp_v4_syn_recv_sock() before we set the snt_synack timestamp, since tcp_synack_rtt_meas() already ensures that we only take a SYNACK RTT sample if snt_synack is non-zero. To be careful, we only take a snt_synack timestamp when a SYNACK transmit or retransmit succeeds. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 12 +++++++++--- net/ipv6/tcp_ipv6.c | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e2bec815ff23..bb326684495b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5983,6 +5983,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * need req so release it. */ if (req) { + tcp_synack_rtt_meas(sk, req); reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1e66f7fb4fe6..0a7e020f16b5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -868,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->rmt_addr, ireq->opt); err = net_xmit_eval(err); + if (!tcp_rsk(req)->snt_synack && !err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; @@ -1382,6 +1384,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const struct inet_request_sock *ireq = inet_rsk(req); struct sock *child; + int err; req->retrans = 0; req->sk = NULL; @@ -1393,8 +1396,11 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, kfree_skb(skb_synack); return -1; } - ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (!err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; /* XXX (TFO) - is it ok to ignore error and continue? */ spin_lock(&queue->fastopenq->lock); @@ -1612,7 +1618,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; if (dst == NULL) { dst = inet_csk_route_req(sk, &fl4, req); @@ -1650,6 +1655,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (err || want_cookie) goto drop_and_free; + tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cfeeeb7fcf54..d6212d6bc8d8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) } have_isn: tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; @@ -1180,6 +1179,7 @@ have_isn: want_cookie) goto drop_and_free; + tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; -- cgit v1.2.1 From 072539880156fafd86c753070e598c23e8e74586 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:56 +0000 Subject: tcp: TCP Fast Open Server - note timestamps and retransmits for SYNACK RTT Previously, when using TCP Fast Open a server would return from tcp_check_req() before updating snt_synack based on TCP timestamp echo replies and whether or not we've retransmitted the SYNACK. The result was that (a) for TFO connections using timestamps we used an incorrect baseline SYNACK send time (tcp_time_stamp of SYNACK send instead of rcv_tsecr), and (b) for TFO connections that do not have TCP timestamps but retransmit the SYNACK we took a SYNACK RTT sample when we should not take a sample. This fix merely moves the snt_synack update logic a bit earlier in the function, so that connections using TCP Fast Open will properly do these updates when the ACK for the SYNACK arrives. Moving this snt_synack update logic means that with TCP_DEFER_ACCEPT enabled we do a few instructions of wasted work on each bare ACK, but that seems OK. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5792577b5bc5..27536ba16c9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -692,6 +692,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; + /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ + if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) + tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; + else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + tcp_rsk(req)->snt_synack = 0; + /* For Fast Open no more processing is needed (sk is the * child socket). */ @@ -705,10 +711,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } - if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) - tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; - else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ - tcp_rsk(req)->snt_synack = 0; /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all -- cgit v1.2.1 From e69bebde46b5ddfb7d8f17cae66031e282ebc464 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:57 +0000 Subject: tcp: TCP Fast Open Server - call tcp_validate_incoming() for all packets A TCP Fast Open (TFO) passive connection must call both tcp_check_req() and tcp_validate_incoming() for all incoming ACKs that are attempting to complete the 3WHS. This is needed to parallel all the action that happens for a non-TFO connection, where for an ACK that is attempting to complete the 3WHS we call both tcp_check_req() and tcp_validate_incoming(). For example, upon receiving the ACK that completes the 3WHS, we need to call tcp_fast_parse_options() and update ts_recent based on the incoming timestamp value in the ACK. One symptom of the problem with the previous code was that for passive TFO connections using TCP timestamps, the outgoing TS ecr values ignored the incoming TS val value on the ACK that completed the 3WHS. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bb326684495b..36e069a1f47b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5969,7 +5969,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tcp_check_req(sk, skb, req, NULL, true) == NULL) goto discard; - } else if (!tcp_validate_incoming(sk, skb, th, 0)) + } + if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; /* step 5: check the ACK field */ -- cgit v1.2.1 From bd9087e0407bfb5ec22d82d3bab1d2bba45daf5a Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Sep 2012 21:22:23 +0200 Subject: netfilter: ipset: Add /0 network support to hash:net,iface type Now it is possible to setup a single hash:net,iface type of set and a single ip6?tables match which covers all egress/ingress filtering. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_netiface.c | 44 ++++++++++++++---------------- 1 file changed, 21 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 632693f0beeb..e7c671dd3cce 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -140,7 +140,7 @@ struct hash_netiface4_elem_hashed { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; }; #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) @@ -151,7 +151,7 @@ struct hash_netiface4_elem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; }; @@ -161,7 +161,7 @@ struct hash_netiface4_telem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; unsigned long timeout; }; @@ -181,18 +181,14 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, static inline bool hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) { - return elem->cidr == 0; + return elem->elem == 0; } static inline void hash_netiface4_data_copy(struct hash_netiface4_elem *dst, const struct hash_netiface4_elem *src) { - dst->ip = src->ip; - dst->cidr = src->cidr; - dst->physdev = src->physdev; - dst->iface = src->iface; - dst->nomatch = src->nomatch; + memcpy(dst, src, sizeof(*dst)); } static inline void @@ -217,7 +213,7 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) static inline void hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem) { - elem->cidr = 0; + elem->elem = 0; } static bool @@ -288,7 +284,8 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem data = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .elem = 1, }; int ret; @@ -339,7 +336,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], { struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netiface4_elem data = { .cidr = HOST_MASK }; + struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 }; u32 ip = 0, ip_to, last; u32 timeout = h->timeout; char iface[IFNAMSIZ]; @@ -360,7 +357,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr || data.cidr > HOST_MASK) + if (data.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -389,7 +386,6 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) flags |= (cadt_flags << 16); } - if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { data.ip = htonl(ip & ip_set_hostmask(data.cidr)); ret = adtfn(set, &data, timeout, flags); @@ -442,7 +438,7 @@ struct hash_netiface6_elem_hashed { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; }; #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) @@ -452,7 +448,7 @@ struct hash_netiface6_elem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; }; @@ -461,7 +457,7 @@ struct hash_netiface6_telem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; unsigned long timeout; }; @@ -481,7 +477,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, static inline bool hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem) { - return elem->cidr == 0; + return elem->elem == 0; } static inline void @@ -506,7 +502,7 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { - elem->cidr = 0; + elem->elem = 0; } static inline void @@ -590,7 +586,8 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem data = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .elem = 1, }; int ret; @@ -637,7 +634,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], { struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netiface6_elem data = { .cidr = HOST_MASK }; + struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 }; u32 timeout = h->timeout; char iface[IFNAMSIZ]; int ret; @@ -659,7 +656,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr || data.cidr > HOST_MASK) + if (data.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip6_netmask(&data.ip, data.cidr); @@ -777,7 +774,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = 0, - .revision_max = 1, /* nomatch flag support added */ + /* = 1, nomatch flag support added */ + .revision_max = 2, /* /0 support added */ .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, -- cgit v1.2.1 From 10111a6ef373c377e87730749a0f68210c3fd062 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 21 Sep 2012 21:59:32 +0200 Subject: netfilter: ipset: Include supported revisions in module description Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 9 ++++++--- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 9 ++++++--- net/netfilter/ipset/ip_set_bitmap_port.c | 9 ++++++--- net/netfilter/ipset/ip_set_hash_ip.c | 9 ++++++--- net/netfilter/ipset/ip_set_hash_ipport.c | 9 ++++++--- net/netfilter/ipset/ip_set_hash_ipportip.c | 9 ++++++--- net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 ++++++++----- net/netfilter/ipset/ip_set_hash_net.c | 11 +++++++---- net/netfilter/ipset/ip_set_hash_netiface.c | 11 +++++++---- net/netfilter/ipset/ip_set_hash_netport.c | 13 ++++++++----- net/netfilter/ipset/ip_set_list_set.c | 9 ++++++--- 11 files changed, 72 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 02184b5ef9e1..4a92fd47bd4c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -27,9 +27,12 @@ #define IP_SET_BITMAP_TIMEOUT #include +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("bitmap:ip type of IP sets"); +IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); /* Type structure */ @@ -556,8 +559,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 6819d3cff919..645c9d13c194 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -26,9 +26,12 @@ #include #include +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets"); +IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); enum { @@ -630,8 +633,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b9f1fce7053b..e6b2db76f4c3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -22,9 +22,12 @@ #define IP_SET_BITMAP_TIMEOUT #include +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("bitmap:port type of IP sets"); +IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:port"); /* Type structure */ @@ -487,8 +490,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index bc8f76edc260..ec3dba5dcd62 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -24,9 +24,12 @@ #include #include +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:ip type of IP sets"); +IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ @@ -452,8 +455,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6760fd4d7858..0171f7502fa5 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -25,9 +25,12 @@ #include #include +#define REVISION_MIN 0 +#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:ip,port type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ @@ -523,8 +526,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 1, /* SCTP and UDPLITE support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index ac09bec274f1..6344ef551ec8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -25,9 +25,12 @@ #include #include +#define REVISION_MIN 0 +#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ @@ -541,8 +544,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 1, /* SCTP and UDPLITE support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 242814e4db4e..8ee916875a23 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -25,9 +25,14 @@ #include #include +#define REVISION_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +#define REVISION_MAX 3 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:ip,port,net type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -695,10 +700,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* 1 SCTP and UDPLITE support added */ - /* 2 Range as input support for IPv4 added */ - .revision_max = 3, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d676093822b1..014ff7272f7b 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -23,9 +23,13 @@ #include #include +#define REVISION_MIN 0 +/* 1 Range as input support for IPv4 added */ +#define REVISION_MAX 2 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:net type of IP sets"); +IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ @@ -535,9 +539,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* = 1 Range as input support for IPv4 added */ - .revision_max = 2, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index e7c671dd3cce..a5c8491d265e 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -24,9 +24,13 @@ #include #include +#define REVISION_MIN 0 +/* 1 nomatch flag support added */ +#define REVISION_MAX 2 /* /0 support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:net,iface type of IP sets"); +IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -773,9 +777,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* = 1, nomatch flag support added */ - .revision_max = 2, /* /0 support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 3ec27fccddd7..7ca357a62b1c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -24,9 +24,14 @@ #include #include +#define REVISION_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +#define REVISION_MAX 3 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("hash:net,port type of IP sets"); +IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -648,10 +653,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* 1 SCTP and UDPLITE support added */ - /* 2, Range as input support for IPv4 added */ - .revision_max = 3, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6cb1225765f9..8371c2bac2e4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -16,9 +16,12 @@ #include #include +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("list:set type of IP sets"); +IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements without and with timeout */ @@ -579,8 +582,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, -- cgit v1.2.1 From 3ace95c0ac125a042cfb682d0a9bbdbf1e5a2c65 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 21 Sep 2012 22:01:45 +0200 Subject: netfilter: ipset: Coding style fixes Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++-- net/netfilter/ipset/ip_set_core.c | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 645c9d13c194..0f92dc24cb89 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -323,11 +323,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, (elem->match == MAC_FILLED && nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether))) - goto nla_put_failure; + goto nla_put_failure; timeout = elem->match == MAC_UNSET ? elem->timeout : ip_set_timeout_get(elem->timeout); if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout))) - goto nla_put_failure; + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index ad39ef406851..72e9bf0ef90d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -69,7 +69,8 @@ find_set_type(const char *name, u8 family, u8 revision) list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && - (type->family == family || type->family == NFPROTO_UNSPEC) && + (type->family == family || + type->family == NFPROTO_UNSPEC) && revision >= type->revision_min && revision <= type->revision_max) return type; @@ -149,7 +150,8 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && - (type->family == family || type->family == NFPROTO_UNSPEC)) { + (type->family == family || + type->family == NFPROTO_UNSPEC)) { found = true; if (type->revision_min < *min) *min = type->revision_min; @@ -721,7 +723,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - if ((ret = find_free_id(set->name, &index, &clash)) != 0) { + ret = find_free_id(set->name, &index, &clash); + if (ret != 0) { /* If this is the same set and requested, ignore error */ if (ret == -EEXIST && (flags & IPSET_FLAG_EXIST) && -- cgit v1.2.1 From 3e0304a583d72c747caa8afac76b8d514aa293f5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 21 Sep 2012 22:02:36 +0200 Subject: netfilter: ipset: Support to match elements marked with "nomatch" Exceptions can now be matched and we can branch according to the possible cases: a. match in the set if the element is not flagged as "nomatch" b. match in the set if the element is flagged with "nomatch" c. no match i.e. iptables ... -m set --match-set ... -j ... iptables ... -m set --match-set ... --nomatch-entries -j ... ... Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 6 ++++++ net/netfilter/ipset/ip_set_hash_ipportnet.c | 11 ++++++----- net/netfilter/ipset/ip_set_hash_net.c | 10 +++++----- net/netfilter/ipset/ip_set_hash_netiface.c | 11 ++++++----- net/netfilter/ipset/ip_set_hash_netport.c | 10 +++++----- net/netfilter/xt_set.c | 22 ++++++++++++++++++++++ 6 files changed, 50 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 72e9bf0ef90d..778465f217fa 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -370,6 +370,12 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); ret = 1; + } else { + /* --return-nomatch: invert matched element */ + if ((opt->flags & IPSET_RETURN_NOMATCH) && + (set->type->features & IPSET_TYPE_NOMATCH) && + (ret > 0 || ret == -ENOTEMPTY)) + ret = -ret; } /* Convert error codes to nomatch */ diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8ee916875a23..cb71f9a774e7 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,10 +104,10 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -411,10 +411,10 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -697,7 +697,8 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_ipportnet_type __read_mostly = { .name = "hash:ip,port,net", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = REVISION_MIN, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 014ff7272f7b..29e94b981f3f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -90,10 +90,10 @@ hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_net4_data_match(const struct hash_net4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -311,10 +311,10 @@ hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_net6_data_match(const struct hash_net6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -536,7 +536,7 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_net_type __read_mostly = { .name = "hash:net", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP, + .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = REVISION_MIN, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index a5c8491d265e..b9a63381e349 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -201,10 +201,10 @@ hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_netiface4_data_match(const struct hash_netiface4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -497,10 +497,10 @@ hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_netiface6_data_match(const struct hash_netiface6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -774,7 +774,8 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_netiface_type __read_mostly = { .name = "hash:net,iface", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE, + .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE | + IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = REVISION_MIN, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7ca357a62b1c..7ef700de596c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,10 +104,10 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -375,10 +375,10 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -650,7 +650,7 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_netport_type __read_mostly = { .name = "hash:net,port", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = REVISION_MIN, diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index c6f7db720d84..865a9e54f3ad 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -356,6 +356,27 @@ static struct xt_match set_matches[] __read_mostly = { .destroy = set_match_v1_destroy, .me = THIS_MODULE }, + /* --return-nomatch flag support */ + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 2, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 2, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, + .me = THIS_MODULE + }, }; static struct xt_target set_targets[] __read_mostly = { @@ -389,6 +410,7 @@ static struct xt_target set_targets[] __read_mostly = { .destroy = set_target_v1_destroy, .me = THIS_MODULE }, + /* --timeout and --exist flags support */ { .name = "SET", .revision = 2, -- cgit v1.2.1 From 30099b2e9b3a1f88250d31472edf2b6d6f0a7d8b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 17:03:47 +0000 Subject: tcp: TCP Fast Open Server - record retransmits after 3WHS When recording the number of SYNACK retransmits for servers using TCP Fast Open, fix the code to ensure that we copy over the retransmit count from the request_sock after we receive the ACK that completes the 3-way handshake. The story here is similar to that of SYNACK RTT measurements. Previously we were always doing this in tcp_v4_syn_recv_sock(). However, for TCP Fast Open connections tcp_v4_conn_req_fastopen() calls tcp_v4_syn_recv_sock() at the time we receive the SYN. So for TFO we must copy the final SYNACK retransmit count in tcp_rcv_state_process(). Note that copying over the SYNACK retransmit count will give us the correct count since, as is mentioned in a comment in tcp_retransmit_timer(), before we receive an ACK for our SYN-ACK a TFO passive connection does not retransmit anything else (e.g., data or FIN segments). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 36e069a1f47b..e037697c3b77 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5985,6 +5985,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (req) { tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->retrans; + reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for -- cgit v1.2.1 From 40a3eb33e307616567f4b81792f405a7f3f0abee Mon Sep 17 00:00:00 2001 From: Def Date: Thu, 20 Sep 2012 14:56:13 +0200 Subject: batman-adv: Fix change mac address of soft iface. Into function interface_set_mac_addr, the function tt_local_add was invoked before updating dev->dev_addr. The new MAC address was not tagged as NoPurge. Signed-off-by: Def --- net/batman-adv/soft-interface.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2aae96c..21c53577c8d6 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -100,18 +100,21 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct sockaddr *addr = p; + uint8_t old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + memcpy(old_addr, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, dev->dev_addr, + batadv_tt_local_remove(bat_priv, old_addr, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } -- cgit v1.2.1 From 7caf69fb9c5017df01945a1861c042f6aa08edeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 18 Sep 2012 03:01:08 +0200 Subject: batman-adv: Fix symmetry check / route flapping in multi interface setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If receiving an OGM from a neighbor other than the currently selected and if it has the same TQ then we are supposed to switch if this neighbor provides a more symmetric link than the currently selected one. However this symmetry check currently is broken if the interface of the neighbor we received the OGM from and the one of the currently selected neighbor differ: We are currently trying to determine the symmetry of the link towards the selected router via the link we received the OGM from instead of just checking via the link towards the currently selected router. This leads to way more route switches than necessary and can lead to permanent route flapping in many common multi interface setups. This patch fixes this issue by using the right interface for this symmetry check. Signed-off-by: Linus Lüssing --- net/batman-adv/bat_iv_ogm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8bdd1e..469daabd90c7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -642,7 +642,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + int if_num; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -727,17 +728,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = router->if_incoming->if_num; + sum_orig = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = neigh_node->if_incoming->if_num; + sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } -- cgit v1.2.1 From eab48345c2b2d791159aaac4a77000baa8dbc1ae Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Thu, 6 Sep 2012 16:06:52 +0200 Subject: rfkill: prevent unnecessary event generation Prevent unnecessary rfkill event generation when the state has not actually changed. These events have to be delivered to relevant userspace processes, causing these processes to wake up and do something while they could as well have slept. This obviously results in more CPU usage, longer time-to-sleep-again and therefore higher power consumption. Signed-off-by: Vitaly Wool Signed-off-by: Mykyta Iziumtsev Signed-off-by: Johannes Berg --- net/rfkill/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index c275bad12068..a5c952741279 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -270,6 +270,7 @@ static bool __rfkill_set_hw_state(struct rfkill *rfkill, static void rfkill_set_block(struct rfkill *rfkill, bool blocked) { unsigned long flags; + bool prev, curr; int err; if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) @@ -284,6 +285,8 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->ops->query(rfkill, rfkill->data); spin_lock_irqsave(&rfkill->lock, flags); + prev = rfkill->state & RFKILL_BLOCK_SW; + if (rfkill->state & RFKILL_BLOCK_SW) rfkill->state |= RFKILL_BLOCK_SW_PREV; else @@ -313,10 +316,13 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) } rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL; rfkill->state &= ~RFKILL_BLOCK_SW_PREV; + curr = rfkill->state & RFKILL_BLOCK_SW; spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); - rfkill_event(rfkill); + + if (prev != curr) + rfkill_event(rfkill); } #ifdef CONFIG_RFKILL_INPUT -- cgit v1.2.1 From 54eb3df3a7d01b6cd395bdc1098280f2f93fbec5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Sep 2012 00:23:09 +0000 Subject: netfilter: xt_time: add support to ignore day transition Currently, if you want to do something like: "match Monday, starting 23:00, for two hours" You need two rules, one for Mon 23:00 to 0:00 and one for Tue 0:00-1:00. The rule: --weekdays Mo --timestart 23:00 --timestop 01:00 looks correct, but it will first match on monday from midnight to 1 a.m. and then again for another hour from 23:00 onwards. This permits userspace to explicitly ignore the day transition and match for a single, continuous time period instead. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_time.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c48975ff8ea2..0ae55a36f492 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -42,6 +42,7 @@ static const u_int16_t days_since_leapyear[] = { */ enum { DSE_FIRST = 2039, + SECONDS_PER_DAY = 86400, }; static const u_int16_t days_since_epoch[] = { /* 2039 - 2030 */ @@ -78,7 +79,7 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time) unsigned int v, w; /* Each day has 86400s, so finding the hour/minute is actually easy. */ - v = time % 86400; + v = time % SECONDS_PER_DAY; r->second = v % 60; w = v / 60; r->minute = w % 60; @@ -199,6 +200,18 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) if (packet_time < info->daytime_start && packet_time > info->daytime_stop) return false; + + /** if user asked to ignore 'next day', then e.g. + * '1 PM Wed, August 1st' should be treated + * like 'Tue 1 PM July 31st'. + * + * This also causes + * 'Monday, "23:00 to 01:00", to match for 2 hours, starting + * Monday 23:00 to Tuesday 01:00. + */ + if ((info->flags & XT_TIME_CONTIGUOUS) && + packet_time <= info->daytime_stop) + stamp -= SECONDS_PER_DAY; } localtime_2(¤t_time, stamp); @@ -227,6 +240,15 @@ static int time_mt_check(const struct xt_mtchk_param *par) return -EDOM; } + if (info->flags & ~XT_TIME_ALL_FLAGS) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); + return -EINVAL; + } + + if ((info->flags & XT_TIME_CONTIGUOUS) && + info->daytime_start < info->daytime_stop) + return -EINVAL; + return 0; } -- cgit v1.2.1 From 7be54ca4764bdead40bee7b645a72718c20ff2c8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 21 Sep 2012 16:52:08 +0200 Subject: netfilter: nf_ct_ftp: add sequence tracking pickup facility for injected entries This patch allows the FTP helper to pickup the sequence tracking from the first packet seen. This is useful to fix the breakage of the first FTP command after the failover while using conntrackd to synchronize states. The seq_aft_nl_num field in struct nf_ct_ftp_info has been shrinked to 16-bits (enough for what it does), so we can use the remaining 16-bits to store the flags while using the same size for the private FTP helper data. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 21 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 4 ++-- net/netfilter/nfnetlink_cthelper.c | 3 +++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index f8cc26ad4456..1ce3befb7c8a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -396,6 +396,12 @@ static int help(struct sk_buff *skb, /* Look up to see if we're just after a \n. */ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { + /* We're picking up this, clear flags and let it continue */ + if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) { + ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP; + goto skip_nl_seq; + } + /* Now if this ends in \n, update ftp info. */ pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n", ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", @@ -406,6 +412,7 @@ static int help(struct sk_buff *skb, goto out_update_nl; } +skip_nl_seq: /* Initialize IP/IPv6 addr to expected address (it's not mentioned in EPSV responses) */ cmd.l3num = nf_ct_l3num(ct); @@ -512,6 +519,19 @@ out_update_nl: return ret; } +static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct) +{ + struct nf_ct_ftp_master *ftp = nfct_help_data(ct); + + /* This conntrack has been injected from user-space, always pick up + * sequence tracking. Otherwise, the first FTP command after the + * failover breaks. + */ + ftp->flags[IP_CT_DIR_ORIGINAL] |= NF_CT_FTP_SEQ_PICKUP; + ftp->flags[IP_CT_DIR_REPLY] |= NF_CT_FTP_SEQ_PICKUP; + return 0; +} + static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { @@ -561,6 +581,7 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][j].expect_policy = &ftp_exp_policy; ftp[i][j].me = THIS_MODULE; ftp[i][j].help = help; + ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr; if (ports[i] == FTP_PORT) sprintf(ftp[i][j].name, "ftp"); else diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2dcd080b8c4f..7bbfb3deea30 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1238,7 +1238,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help) { if (help->helper == helper) { /* update private helper data if allowed. */ - if (helper->from_nlattr && helpinfo) + if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); return 0; } else @@ -1467,7 +1467,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } /* set private helper data if allowed. */ - if (helper->from_nlattr && helpinfo) + if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); /* not in hash table yet so not strictly necessary */ diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3678073360a3..945950a8b1f1 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -85,6 +85,9 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { const struct nf_conn_help *help = nfct_help(ct); + if (attr == NULL) + return -EINVAL; + if (help->helper->data_len == 0) return -EINVAL; -- cgit v1.2.1 From ba8d3b0bf5900b9ee5354e7d73358867763a6766 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 6 Sep 2012 17:09:26 +0200 Subject: netfilter: nfnetlink_queue: fix maximum packet length to userspace The packets that we send via NFQUEUE are encapsulated in the NFQA_PAYLOAD attribute. The length of the packet in userspace is obtained via attr->nla_len field. This field contains the size of the Netlink attribute header plus the packet length. If the maximum packet length is specified, ie. 65535 bytes, and packets in the range of (65531,65535] are sent to userspace, the attr->nla_len overflows and it reports bogus lengths to the application. To fix this, this patch limits the maximum packet length to 65531 bytes. If larger packet length is specified, the packet that we send to user-space is truncated to 65531 bytes. To support 65535 bytes packets, we have to revisit the idea of the 32-bits Netlink attribute length. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 43de3a03ee76..3e4ddcb7e781 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -526,9 +526,13 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* we're using struct nlattr which has 16bit nla_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; + /* We're using struct nlattr which has 16bit nla_len. Note that + * nla_len includes the header length. Thus, the maximum packet + * length that we support is 65531 bytes. We send truncated + * packets if the specified length is larger than that. + */ + if (range > 0xffff - NLA_HDRLEN) + queue->copy_range = 0xffff - NLA_HDRLEN; else queue->copy_range = range; break; -- cgit v1.2.1 From 6ee584be3ee30f72dec8a8ca87bc10824e27a631 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 24 Sep 2012 14:52:12 +0200 Subject: netfilter: nfnetlink_queue: add NFQA_CAP_LEN attribute This patch adds the NFQA_CAP_LEN attribute that allows us to know what is the real packet size from user-space (even if we decided to retrieve just a few bytes from the packet instead of all of it). Security software that inspects packets should always check for this new attribute to make sure that it is inspecting the entire packet. This also helps to provide a workaround for the problem described in: http://marc.info/?l=netfilter-devel&m=134519473212536&w=2 Original idea from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3e4ddcb7e781..e12d44e75b21 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -225,7 +225,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, { sk_buff_data_t old_tail; size_t size; - size_t data_len = 0; + size_t data_len = 0, cap_len = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -247,7 +247,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) + + nla_total_size(sizeof(u_int32_t))); /* cap_len */ outdev = entry->outdev; @@ -266,6 +267,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, data_len = entskb->len; size += nla_total_size(data_len); + cap_len = entskb->len; break; } @@ -402,6 +404,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; + if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + goto nla_put_failure; + nlh->nlmsg_len = skb->tail - old_tail; return skb; -- cgit v1.2.1 From 5640f7685831e088fe6c2e1f863a6805962f8e81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Sep 2012 23:04:42 +0000 Subject: net: use a per task frag allocator We currently use a per socket order-0 page cache for tcp_sendmsg() operations. This page is used to build fragments for skbs. Its done to increase probability of coalescing small write() into single segments in skbs still in write queue (not yet sent) But it wastes a lot of memory for applications handling many mostly idle sockets, since each socket holds one page in sk->sk_sndmsg_page Its also quite inefficient to build TSO 64KB packets, because we need about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit page allocator more than wanted. This patch adds a per task frag allocator and uses bigger pages, if available. An automatic fallback is done in case of memory pressure. (up to 32768 bytes per frag, thats order-3 pages on x86) This increases TCP stream performance by 20% on loopback device, but also benefits on other network devices, since 8x less frags are mapped on transmit and unmapped on tx completion. Alexander Duyck mentioned a probable performance win on systems with IOMMU enabled. Its possible some SG enabled hardware cant cope with bigger fragments, but their ndo_start_xmit() should already handle this, splitting a fragment in sub fragments, since some arches have PAGE_SIZE=65536 Successfully tested on various ethernet devices. (ixgbe, igb, bnx2x, tg3, mellanox mlx4) Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Vijay Subramanian Cc: Alexander Duyck Tested-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/core/skbuff.c | 37 ++++++------------------ net/core/sock.c | 49 ++++++++++++++++++++++++++++++-- net/ipv4/ip_output.c | 70 ++++++++++++++++++--------------------------- net/ipv4/raw.c | 19 ++++++++----- net/ipv4/tcp.c | 79 ++++++++++++++------------------------------------- net/ipv4/tcp_ipv4.c | 8 ------ net/ipv6/ip6_output.c | 65 ++++++++++++++++-------------------------- net/sched/em_meta.c | 2 +- 8 files changed, 144 insertions(+), 185 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe00d1208167..2ede3cfa8ffa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1655,38 +1655,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sk_buff *skb, struct sock *sk) { - struct page *p = sk->sk_sndmsg_page; - unsigned int off; + struct page_frag *pfrag = sk_page_frag(sk); - if (!p) { -new_page: - p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0); - if (!p) - return NULL; - - off = sk->sk_sndmsg_off = 0; - /* hold one ref to this page until it's full */ - } else { - unsigned int mlen; - - /* If we are the only user of the page, we can reset offset */ - if (page_count(p) == 1) - sk->sk_sndmsg_off = 0; - off = sk->sk_sndmsg_off; - mlen = PAGE_SIZE - off; - if (mlen < 64 && mlen < *len) { - put_page(p); - goto new_page; - } + if (!sk_page_frag_refill(sk, pfrag)) + return NULL; - *len = min_t(unsigned int, *len, mlen); - } + *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); - memcpy(page_address(p) + off, page_address(page) + *offset, *len); - sk->sk_sndmsg_off += *len; - *offset = off; + memcpy(page_address(pfrag->page) + pfrag->offset, + page_address(page) + *offset, *len); + *offset = pfrag->offset; + pfrag->offset += *len; - return p; + return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, diff --git a/net/core/sock.c b/net/core/sock.c index 2693f7649222..727114cd6f7e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1744,6 +1744,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +/* On 32bit arches, an skb frag is limited to 2^15 */ +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + int order; + + if (pfrag->page) { + if (atomic_read(&pfrag->page->_count) == 1) { + pfrag->offset = 0; + return true; + } + if (pfrag->offset < pfrag->size) + return true; + put_page(pfrag->page); + } + + /* We restrict high order allocations to users that can afford to wait */ + order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; + + do { + gfp_t gfp = sk->sk_allocation; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + pfrag->page = alloc_pages(gfp, order); + if (likely(pfrag->page)) { + pfrag->offset = 0; + pfrag->size = PAGE_SIZE << order; + return true; + } + } while (--order >= 0); + + sk_enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return false; +} +EXPORT_SYMBOL(sk_page_frag_refill); + static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) @@ -2173,8 +2212,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; @@ -2417,6 +2456,12 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a5beab1dc958..24a29a39e9a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -793,6 +793,7 @@ static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork, + struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -987,47 +988,30 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = cork->page; - int off = cork->off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if (i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - cork->page = page; - cork->off = 0; - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; - goto error; - } - if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } - cork->off += copy; - skb_frag_size_add(frag, copy); + copy = min_t(int, copy, pfrag->size - pfrag->offset); + if (getfrag(from, + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1039,6 +1023,8 @@ alloc_new_skb: return 0; +error_efault: + err = -EFAULT; error: cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); @@ -1079,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; - cork->page = NULL; - cork->off = 0; return 0; } @@ -1117,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, transhdrlen = 0; } - return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, + return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, + sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } @@ -1439,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, + err = __ip_append_data(sk, fl4, &queue, &cork, + ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { __ip_flush_pending_frames(sk, &queue, &cork); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f2425785d40a..a80740ba4248 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,23 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; - - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + struct icmphdr _hdr; + const struct icmphdr *hdr; + + pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n", + skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len); + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7b1e940393cf..72ea4752f21b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1150,78 +1150,43 @@ new_segment: if (err) goto do_fault; } else { - bool merge = false; + bool merge = true; int i = skb_shinfo(skb)->nr_frags; - struct page *page = sk->sk_sndmsg_page; - int off; - - if (page && page_count(page) == 1) - sk->sk_sndmsg_off = 0; - - off = sk->sk_sndmsg_off; - - if (skb_can_coalesce(skb, i, page, off) && - off != PAGE_SIZE) { - /* We can extend the last page - * fragment. */ - merge = true; - } else if (i == MAX_SKB_FRAGS || !sg) { - /* Need to add new fragment and cannot - * do this because interface is non-SG, - * or because all the page slots are - * busy. */ - tcp_mark_push(tp, skb); - goto new_segment; - } else if (page) { - if (off == PAGE_SIZE) { - put_page(page); - sk->sk_sndmsg_page = page = NULL; - off = 0; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - } else - off = 0; + merge = false; + } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; - if (!page) { - /* Allocate new cache page. */ - if (!(page = sk_stream_alloc_page(sk))) - goto wait_for_memory; - } - - /* Time to copy data. We are close to - * the end! */ err = skb_copy_to_page_nocache(sk, from, skb, - page, off, copy); - if (err) { - /* If this page was new, give it to the - * socket so it does not get leaked. - */ - if (!sk->sk_sndmsg_page) { - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; - } + pfrag->page, + pfrag->offset, + copy); + if (err) goto do_error; - } /* Update the skb. */ if (merge) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { - skb_fill_page_desc(skb, i, page, off, copy); - if (sk->sk_sndmsg_page) { - get_page(page); - } else if (off + copy < PAGE_SIZE) { - get_page(page); - sk->sk_sndmsg_page = page; - } + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } - - sk->sk_sndmsg_off = off + copy; + pfrag->offset += copy; } if (!copied) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0a7e020f16b5..93406c583f43 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2200,14 +2200,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* - * If sendmsg cached page exists, toss it. - */ - if (sk->sk_sndmsg_page) { - __free_page(sk->sk_sndmsg_page); - sk->sk_sndmsg_page = NULL; - } - /* TCP Cookie Transactions */ if (tp->cookie_values != NULL) { kref_put(&tp->cookie_values->kref, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3dd4a37488d5..aece3e792f84 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1279,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1504,48 +1502,31 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if(i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + struct page_frag *pfrag = sk_page_frag(sk); - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (getfrag(from, - skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; - goto error; - } - sk->sk_sndmsg_off += copy; - skb_frag_size_add(frag, copy); + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1554,7 +1535,11 @@ alloc_new_skb: offset += copy; length -= copy; } + return 0; + +error_efault: + err = -EFAULT; error: cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4ab6e3325573..7c3de6ffa516 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -461,7 +461,7 @@ META_COLLECTOR(int_sk_sndtimeo) META_COLLECTOR(int_sk_sendmsg_off) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_sndmsg_off; + dst->value = skb->sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) -- cgit v1.2.1 From 9e49e88958feb41ec701fa34b44723dabadbc28c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 24 Sep 2012 02:23:59 +0000 Subject: filter: add XOR instruction for use with X/K SKF_AD_ALU_XOR_X has been added a while ago, but as an 'ancillary' operation that is invoked through a negative offset in K within BPF load operations. Since BPF_MOD has recently been added, BPF_XOR should also be part of the common ALU operations. Removing SKF_AD_ALU_XOR_X might not be an option since this is exposed to user space. Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/filter.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index fbe3a8d12570..3d92ebb7fbcf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -187,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb, case BPF_S_ALU_OR_K: A |= K; continue; + case BPF_S_ANC_ALU_XOR_X: + case BPF_S_ALU_XOR_X: + A ^= X; + continue; + case BPF_S_ALU_XOR_K: + A ^= K; + continue; case BPF_S_ALU_LSH_X: A <<= X; continue; @@ -334,9 +341,6 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; - case BPF_S_ANC_ALU_XOR_X: - A ^= X; - continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -483,6 +487,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, + [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, + [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, -- cgit v1.2.1 From 3e10986d1d698140747fcfc2761ec9cb64c1d582 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Sep 2012 07:00:11 +0000 Subject: net: guard tcp_set_keepalive() to tcp sockets Its possible to use RAW sockets to get a crash in tcp_set_keepalive() / sk_reset_timer() Fix is to make sure socket is a SOCK_STREAM one. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 305792076121..a6000fbad294 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -691,7 +691,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); -- cgit v1.2.1 From 7e0352306f6869b442a574a8e691f126c9fe930a Mon Sep 17 00:00:00 2001 From: Ilan Elias Date: Wed, 15 Aug 2012 11:46:22 +0300 Subject: NFC: Set local general bytes in nci_start_poll If initiator protocol is NFC-DEP, set the local general bytes in nci_start_poll. Signed-off-by: Ilan Elias Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nci/rsp.c | 14 ++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index f81efe13985a..f017b781667a 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -176,6 +176,27 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) (1 + ((*num) * sizeof(struct disc_map_config))), &cmd); } +struct nci_set_config_param { + __u8 id; + size_t len; + __u8 *val; +}; + +static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_set_config_param *param = (struct nci_set_config_param *)opt; + struct nci_core_set_config_cmd cmd; + + BUG_ON(param->len > NCI_MAX_PARAM_LEN); + + cmd.num_params = 1; + cmd.param.id = param->id; + cmd.param.len = param->len; + memcpy(cmd.param.val, param->val, param->len); + + nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd); +} + static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) { struct nci_rf_disc_cmd cmd; @@ -388,6 +409,32 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } +static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + struct nci_set_config_param param; + __u8 local_gb[NFC_MAX_GT_LEN]; + int i, rc = 0; + + param.val = nfc_get_local_general_bytes(nfc_dev, ¶m.len); + if ((param.val == NULL) || (param.len == 0)) + return rc; + + if (param.len > NCI_MAX_PARAM_LEN) + return -EINVAL; + + for (i = 0; i < param.len; i++) + local_gb[param.len-1-i] = param.val[i]; + + param.id = NCI_PN_ATR_REQ_GEN_BYTES; + param.val = local_gb; + + rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); + + return rc; +} + static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { @@ -415,6 +462,14 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, return -EBUSY; } + if (im_protocols & NFC_PROTO_NFC_DEP_MASK) { + rc = nci_set_local_general_bytes(nfc_dev); + if (rc) { + pr_err("failed to set local general bytes\n"); + return rc; + } + } + rc = nci_request(ndev, nci_rf_discover_req, im_protocols, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 3003c3390e49..dd072f38ad00 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -119,6 +119,16 @@ exit: nci_req_complete(ndev, rsp_1->status); } +static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_core_set_config_rsp *rsp = (void *) skb->data; + + pr_debug("status 0x%x\n", rsp->status); + + nci_req_complete(ndev, rsp->status); +} + static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { @@ -194,6 +204,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_core_init_rsp_packet(ndev, skb); break; + case NCI_OP_CORE_SET_CONFIG_RSP: + nci_core_set_config_rsp_packet(ndev, skb); + break; + case NCI_OP_RF_DISCOVER_MAP_RSP: nci_rf_disc_map_rsp_packet(ndev, skb); break; -- cgit v1.2.1 From ac206838403411e617dbe0e7df1891ee957f1f9a Mon Sep 17 00:00:00 2001 From: Ilan Elias Date: Wed, 15 Aug 2012 11:46:23 +0300 Subject: NFC: Parse NCI NFC-DEP activation params Signed-off-by: Ilan Elias Signed-off-by: Samuel Ortiz --- net/nfc/nci/ntf.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'net') diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index af7a93b04393..6e17661a41a4 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -361,6 +361,33 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, return NCI_STATUS_OK; } +static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf, __u8 *data) +{ + struct activation_params_poll_nfc_dep *poll; + int i; + + switch (ntf->activation_rf_tech_and_mode) { + case NCI_NFC_A_PASSIVE_POLL_MODE: + case NCI_NFC_F_PASSIVE_POLL_MODE: + poll = &ntf->activation_params.poll_nfc_dep; + poll->atr_res_len = min_t(__u8, *data++, 63); + pr_debug("atr_res_len %d\n", poll->atr_res_len); + if (poll->atr_res_len > 0) { + for (i = 0; i < poll->atr_res_len; i++) + poll->atr_res[poll->atr_res_len-1-i] = data[i]; + } + break; + + default: + pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", + ntf->activation_rf_tech_and_mode); + return NCI_STATUS_RF_PROTOCOL_ERROR; + } + + return NCI_STATUS_OK; +} + static void nci_target_auto_activated(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf) { @@ -454,6 +481,11 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, &ntf, data); break; + case NCI_RF_INTERFACE_NFC_DEP: + err = nci_extract_activation_params_nfc_dep(ndev, + &ntf, data); + break; + case NCI_RF_INTERFACE_FRAME: /* no activation params */ break; -- cgit v1.2.1 From 767f19ae698e535f308663c48245fa951abebe20 Mon Sep 17 00:00:00 2001 From: Ilan Elias Date: Wed, 15 Aug 2012 11:46:24 +0300 Subject: NFC: Implement NCI dep_link_up and dep_link_down During NFC-DEP target activation, store the remote general bytes to be used later in dep_link_up. When dep_link_up is called, activate the NFC-DEP target, and forward the remote general bytes. When dep_link_down is called, deactivate the target. Signed-off-by: Ilan Elias Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 36 +++++++++++++++++++++++++++++++++++- net/nfc/nci/ntf.c | 20 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index f017b781667a..acf9abb7d99b 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -564,7 +564,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - pr_debug("target_idx %d\n", target->idx); + pr_debug("entry\n"); if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); @@ -579,6 +579,38 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } } + +static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, + __u8 comm_mode, __u8 *gb, size_t gb_len) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + int rc; + + pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode); + + rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP); + if (rc) + return rc; + + rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb, + ndev->remote_gb_len); + if (!rc) + rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE, + NFC_RF_INITIATOR); + + return rc; +} + +static int nci_dep_link_down(struct nfc_dev *nfc_dev) +{ + pr_debug("entry\n"); + + nci_deactivate_target(nfc_dev, NULL); + + return 0; +} + + static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) @@ -612,6 +644,8 @@ static struct nfc_ops nci_nfc_ops = { .dev_down = nci_dev_down, .start_poll = nci_start_poll, .stop_poll = nci_stop_poll, + .dep_link_up = nci_dep_link_up, + .dep_link_down = nci_dep_link_down, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 6e17661a41a4..b2aa98ef0927 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -176,6 +176,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, protocol = NFC_PROTO_ISO14443_B_MASK; else if (rf_protocol == NCI_RF_PROTOCOL_T3T) protocol = NFC_PROTO_FELICA_MASK; + else if (rf_protocol == NCI_RF_PROTOCOL_NFC_DEP) + protocol = NFC_PROTO_NFC_DEP_MASK; else protocol = 0; @@ -505,6 +507,24 @@ exit: /* set the available credits to initial value */ atomic_set(&ndev->credits_cnt, ndev->initial_num_credits); + + /* store general bytes to be reported later in dep_link_up */ + if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) { + ndev->remote_gb_len = 0; + + if (ntf.activation_params_len > 0) { + /* ATR_RES general bytes at offset 15 */ + ndev->remote_gb_len = min_t(__u8, + (ntf.activation_params + .poll_nfc_dep.atr_res_len + - NFC_ATR_RES_GT_OFFSET), + NFC_MAX_GT_LEN); + memcpy(ndev->remote_gb, + (ntf.activation_params.poll_nfc_dep + .atr_res + NFC_ATR_RES_GT_OFFSET), + ndev->remote_gb_len); + } + } } if (atomic_read(&ndev->state) == NCI_DISCOVERY) { -- cgit v1.2.1 From 5db327f96daa2401b9afec6cd80cebe6c6475bb1 Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Thu, 16 Aug 2012 00:55:43 +0530 Subject: NFC: Remove repeated code for NULL check This patch remove the repeated code for checking llcp_sock & llcp_sock->dev against NULL. Signed-off-by: Syam Sidhardhan Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index ddeb9aa398f0..6e188d4020ba 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -300,9 +300,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx, llcp_sock->dsap, llcp_sock->ssap); - if (llcp_sock == NULL || llcp_sock->dev == NULL) - return -EBADFD; - uaddr->sa_family = AF_NFC; *len = sizeof(struct sockaddr_nfc_llcp); -- cgit v1.2.1 From 474fee3db16c63bc440bfb93b57f72ecfc4246f0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Aug 2012 16:22:16 -0700 Subject: NFC: Use system_nrt_wq instead of custom ones NFC is using a number of custom ordered workqueues w/ WQ_MEM_RECLAIM. WQ_MEM_RECLAIM is unnecessary unless NFC is gonna be used as transport for storage device, and all use cases match one work item to one ordered workqueue - IOW, there's no actual ordering going on at all and using system_nrt_wq gives the same behavior. There's nothing to be gained by using custom workqueues. Use system_nrt_wq instead and drop all the custom ones. Signed-off-by: Tejun Heo Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 13 ++---------- net/nfc/hci/core.c | 42 ++++++--------------------------------- net/nfc/hci/hcp.c | 2 +- net/nfc/hci/shdlc.c | 27 +++++++++---------------- net/nfc/llcp/llcp.c | 57 ++++++++--------------------------------------------- net/nfc/llcp/llcp.h | 3 --- 6 files changed, 26 insertions(+), 118 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index ff749794bc5b..c9eacc1f145f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -679,7 +679,7 @@ static void nfc_release(struct device *d) if (dev->ops->check_presence) { del_timer_sync(&dev->check_pres_timer); - destroy_workqueue(dev->check_pres_wq); + cancel_work_sync(&dev->check_pres_work); } nfc_genl_data_exit(&dev->genl_data); @@ -715,7 +715,7 @@ static void nfc_check_pres_timeout(unsigned long data) { struct nfc_dev *dev = (struct nfc_dev *)data; - queue_work(dev->check_pres_wq, &dev->check_pres_work); + queue_work(system_nrt_wq, &dev->check_pres_work); } struct class nfc_class = { @@ -784,20 +784,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - char name[32]; init_timer(&dev->check_pres_timer); dev->check_pres_timer.data = (unsigned long)dev; dev->check_pres_timer.function = nfc_check_pres_timeout; INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); - snprintf(name, sizeof(name), "nfc%d_check_pres_wq", dev->idx); - dev->check_pres_wq = alloc_workqueue(name, WQ_NON_REENTRANT | - WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (dev->check_pres_wq == NULL) { - kfree(dev); - return NULL; - } } return dev; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 1ac7b3fac6c9..03646beb3a73 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -141,7 +141,7 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err, kfree(hdev->cmd_pending_msg); hdev->cmd_pending_msg = NULL; - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + queue_work(system_nrt_wq, &hdev->msg_tx_work); } void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, @@ -326,7 +326,7 @@ static void nfc_hci_cmd_timeout(unsigned long data) { struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + queue_work(system_nrt_wq, &hdev->msg_tx_work); } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, @@ -659,23 +659,11 @@ EXPORT_SYMBOL(nfc_hci_free_device); int nfc_hci_register_device(struct nfc_hci_dev *hdev) { - struct device *dev = &hdev->ndev->dev; - const char *devname = dev_name(dev); - char name[32]; - int r = 0; - mutex_init(&hdev->msg_tx_mutex); INIT_LIST_HEAD(&hdev->msg_tx_queue); INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - snprintf(name, sizeof(name), "%s_hci_msg_tx_wq", devname); - hdev->msg_tx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (hdev->msg_tx_wq == NULL) { - r = -ENOMEM; - goto exit; - } init_timer(&hdev->cmd_timer); hdev->cmd_timer.data = (unsigned long)hdev; @@ -684,27 +672,10 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) skb_queue_head_init(&hdev->rx_hcp_frags); INIT_WORK(&hdev->msg_rx_work, nfc_hci_msg_rx_work); - snprintf(name, sizeof(name), "%s_hci_msg_rx_wq", devname); - hdev->msg_rx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (hdev->msg_rx_wq == NULL) { - r = -ENOMEM; - goto exit; - } skb_queue_head_init(&hdev->msg_rx_queue); - r = nfc_register_device(hdev->ndev); - -exit: - if (r < 0) { - if (hdev->msg_tx_wq) - destroy_workqueue(hdev->msg_tx_wq); - if (hdev->msg_rx_wq) - destroy_workqueue(hdev->msg_rx_wq); - } - - return r; + return nfc_register_device(hdev->ndev); } EXPORT_SYMBOL(nfc_hci_register_device); @@ -725,9 +696,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) nfc_unregister_device(hdev->ndev); - destroy_workqueue(hdev->msg_tx_wq); - - destroy_workqueue(hdev->msg_rx_wq); + cancel_work_sync(&hdev->msg_tx_work); + cancel_work_sync(&hdev->msg_rx_work); } EXPORT_SYMBOL(nfc_hci_unregister_device); @@ -827,7 +797,7 @@ void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); } else { skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); - queue_work(hdev->msg_rx_wq, &hdev->msg_rx_work); + queue_work(system_nrt_wq, &hdev->msg_rx_work); } } EXPORT_SYMBOL(nfc_hci_recv_frame); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index f4dad1a89740..2372b558abe9 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -108,7 +108,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + queue_work(system_nrt_wq, &hdev->msg_tx_work); return 0; diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 6f840c18c892..39b51eacc391 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -540,7 +540,7 @@ static void nfc_shdlc_connect_timeout(unsigned long data) pr_debug("\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } static void nfc_shdlc_t1_timeout(unsigned long data) @@ -549,7 +549,7 @@ static void nfc_shdlc_t1_timeout(unsigned long data) pr_debug("SoftIRQ: need to send ack\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } static void nfc_shdlc_t2_timeout(unsigned long data) @@ -558,7 +558,7 @@ static void nfc_shdlc_t2_timeout(unsigned long data) pr_debug("SoftIRQ: need to retransmit\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } static void nfc_shdlc_sm_work(struct work_struct *work) @@ -598,7 +598,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work) case SHDLC_NEGOCIATING: if (timer_pending(&shdlc->connect_timer) == 0) { shdlc->state = SHDLC_CONNECTING; - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } nfc_shdlc_handle_rcv_queue(shdlc); @@ -662,7 +662,7 @@ static int nfc_shdlc_connect(struct nfc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); wait_event(connect_wq, shdlc->connect_result != 1); @@ -679,7 +679,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } /* @@ -697,7 +697,7 @@ void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb) skb_queue_tail(&shdlc->rcv_q, skb); } - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); } EXPORT_SYMBOL(nfc_shdlc_recv_frame); @@ -754,7 +754,7 @@ static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) skb_queue_tail(&shdlc->send_q, skb); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + queue_work(system_nrt_wq, &shdlc->sm_work); return 0; } @@ -843,7 +843,6 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, { struct nfc_shdlc *shdlc; int r; - char name[32]; if (ops->xmit == NULL) return NULL; @@ -876,11 +875,6 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, skb_queue_head_init(&shdlc->ack_pending_q); INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work); - snprintf(name, sizeof(name), "%s_shdlc_sm_wq", devname); - shdlc->sm_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (shdlc->sm_wq == NULL) - goto err_allocwq; shdlc->client_headroom = tx_headroom; shdlc->client_tailroom = tx_tailroom; @@ -904,9 +898,6 @@ err_regdev: nfc_hci_free_device(shdlc->hdev); err_allocdev: - destroy_workqueue(shdlc->sm_wq); - -err_allocwq: kfree(shdlc); return NULL; @@ -920,7 +911,7 @@ void nfc_shdlc_free(struct nfc_shdlc *shdlc) nfc_hci_unregister_device(shdlc->hdev); nfc_hci_free_device(shdlc->hdev); - destroy_workqueue(shdlc->sm_wq); + cancel_work_sync(&shdlc->sm_work); skb_queue_purge(&shdlc->rcv_q); skb_queue_purge(&shdlc->send_q); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 82f0f7588b46..6f368412ffd2 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -114,9 +114,9 @@ static void local_release(struct kref *ref) nfc_llcp_socket_release(local, false); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); - destroy_workqueue(local->tx_wq); - destroy_workqueue(local->rx_wq); - destroy_workqueue(local->timeout_wq); + cancel_work_sync(&local->tx_work); + cancel_work_sync(&local->rx_work); + cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); kfree(local); } @@ -181,7 +181,7 @@ static void nfc_llcp_symm_timer(unsigned long data) pr_err("SYMM timeout\n"); - queue_work(local->timeout_wq, &local->timeout_work); + queue_work(system_nrt_wq, &local->timeout_work); } struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) @@ -1052,7 +1052,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) } - queue_work(local->tx_wq, &local->tx_work); + queue_work(system_nrt_wq, &local->tx_work); kfree_skb(local->rx_pending); local->rx_pending = NULL; @@ -1071,7 +1071,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(local->rx_wq, &local->rx_work); + queue_work(system_nrt_wq, &local->rx_work); return; } @@ -1086,7 +1086,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(local->rx_wq, &local->rx_work); + queue_work(system_nrt_wq, &local->rx_work); return 0; } @@ -1121,7 +1121,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, if (rf_mode == NFC_RF_INITIATOR) { pr_debug("Queueing Tx work\n"); - queue_work(local->tx_wq, &local->tx_work); + queue_work(system_nrt_wq, &local->tx_work); } else { mod_timer(&local->link_timer, jiffies + msecs_to_jiffies(local->remote_lto)); @@ -1130,10 +1130,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, int nfc_llcp_register_device(struct nfc_dev *ndev) { - struct device *dev = &ndev->dev; struct nfc_llcp_local *local; - char name[32]; - int err; local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL); if (local == NULL) @@ -1149,38 +1146,11 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); - snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev)); - local->tx_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->tx_wq == NULL) { - err = -ENOMEM; - goto err_local; - } local->rx_pending = NULL; INIT_WORK(&local->rx_work, nfc_llcp_rx_work); - snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev)); - local->rx_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->rx_wq == NULL) { - err = -ENOMEM; - goto err_tx_wq; - } INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work); - snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev)); - local->timeout_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->timeout_wq == NULL) { - err = -ENOMEM; - goto err_rx_wq; - } local->sockets.lock = __RW_LOCK_UNLOCKED(local->sockets.lock); local->connecting_sockets.lock = __RW_LOCK_UNLOCKED(local->connecting_sockets.lock); @@ -1192,17 +1162,6 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) list_add(&llcp_devices, &local->list); - return 0; - -err_rx_wq: - destroy_workqueue(local->rx_wq); - -err_tx_wq: - destroy_workqueue(local->tx_wq); - -err_local: - kfree(local); - return 0; } diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 83b8bba5a280..af395c9ceb03 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -56,12 +56,9 @@ struct nfc_llcp_local { struct timer_list link_timer; struct sk_buff_head tx_queue; - struct workqueue_struct *tx_wq; struct work_struct tx_work; - struct workqueue_struct *rx_wq; struct work_struct rx_work; struct sk_buff *rx_pending; - struct workqueue_struct *timeout_wq; struct work_struct timeout_work; u32 target_idx; -- cgit v1.2.1 From 33e5971358c37851137b264f815977507c016fac Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Aug 2012 21:02:40 +0800 Subject: NFC: Remove pointless conditional before HCI kfree_skb() Signed-off-by: Wei Yongjun Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 3 +-- net/nfc/hci/shdlc.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 03646beb3a73..15744c01bddc 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -398,8 +398,7 @@ disconnect_all: nfc_hci_disconnect_all_gates(hdev); exit: - if (skb) - kfree_skb(skb); + kfree_skb(skb); return r; } diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 39b51eacc391..824fb09384ed 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -241,8 +241,7 @@ static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc, } exit: - if (skb) - kfree_skb(skb); + kfree_skb(skb); } static void nfc_shdlc_rcv_ack(struct nfc_shdlc *shdlc, int y_nr) -- cgit v1.2.1 From 52da2449e10039d3bb04c598d24cb1a34530b716 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Sep 2012 21:21:46 +0800 Subject: NFC: Fix possible LLCP memory leak nfc_llcp_build_tlv() malloced the memory and should be free in nfc_llcp_build_gb() after used, and the same in the error handling case, otherwise it will cause memory leak. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 6f368412ffd2..90ef4a176819 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -426,6 +426,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) u8 *miux_tlv, miux_length; __be16 miux; u8 gb_len = 0; + int ret = 0; version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, @@ -450,8 +451,8 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) gb_len += ARRAY_SIZE(llcp_magic); if (gb_len > NFC_MAX_GT_LEN) { - kfree(version_tlv); - return -EINVAL; + ret = -EINVAL; + goto out; } gb_cur = local->gb; @@ -471,12 +472,15 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) memcpy(gb_cur, miux_tlv, miux_length); gb_cur += miux_length; + local->gb_len = gb_len; + +out: kfree(version_tlv); kfree(lto_tlv); + kfree(wks_tlv); + kfree(miux_tlv); - local->gb_len = gb_len; - - return 0; + return ret; } u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) -- cgit v1.2.1 From ade672082dd35aaaf7c8630d16c9f795c30459c4 Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Fri, 7 Sep 2012 11:08:29 +0200 Subject: NFC: Remove crc generation from shdlc layer Checksum is specific for a chip spcification and it varies (in size and type) between different hardware. It should be handled in the driver then. Moreover, shdlc spec doesn't mention crc as a part of the frame. Update pn544_hci driver as well. Signed-off-by: Waldemar Rymarkiewicz Acked-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/shdlc.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 824fb09384ed..ed8796b78f39 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -30,7 +29,6 @@ #include #define SHDLC_LLC_HEAD_ROOM 2 -#define SHDLC_LLC_TAIL_ROOM 2 #define SHDLC_MAX_WINDOW 4 #define SHDLC_SREJ_SUPPORT false @@ -94,28 +92,13 @@ static struct sk_buff *nfc_shdlc_alloc_skb(struct nfc_shdlc *shdlc, struct sk_buff *skb; skb = alloc_skb(shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM + - shdlc->client_tailroom + SHDLC_LLC_TAIL_ROOM + - payload_len, GFP_KERNEL); + shdlc->client_tailroom + payload_len, GFP_KERNEL); if (skb) skb_reserve(skb, shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM); return skb; } -static void nfc_shdlc_add_len_crc(struct sk_buff *skb) -{ - u16 crc; - int len; - - len = skb->len + 2; - *skb_push(skb, 1) = len; - - crc = crc_ccitt(0xffff, skb->data, skb->len); - crc = ~crc; - *skb_put(skb, 1) = crc & 0xff; - *skb_put(skb, 1) = crc >> 8; -} - /* immediately sends an S frame. */ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, enum sframe_type sframe_type, int nr) @@ -131,8 +114,6 @@ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; - nfc_shdlc_add_len_crc(skb); - r = shdlc->ops->xmit(shdlc, skb); kfree_skb(skb); @@ -151,8 +132,6 @@ static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc, *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; - nfc_shdlc_add_len_crc(skb); - r = shdlc->ops->xmit(shdlc, skb); kfree_skb(skb); @@ -509,8 +488,6 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) shdlc->nr); /* SHDLC_DUMP_SKB("shdlc frame written", skb); */ - nfc_shdlc_add_len_crc(skb); - r = shdlc->ops->xmit(shdlc, skb); if (r < 0) { shdlc->hard_fault = r; @@ -880,7 +857,7 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, shdlc->hdev = nfc_hci_allocate_device(&shdlc_ops, init_data, protocols, tx_headroom + SHDLC_LLC_HEAD_ROOM, - tx_tailroom + SHDLC_LLC_TAIL_ROOM, + tx_tailroom, max_link_payload); if (shdlc->hdev == NULL) goto err_allocdev; -- cgit v1.2.1 From c1be211727467882e0485ab062e712a3c1fba840 Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Fri, 7 Sep 2012 11:08:30 +0200 Subject: NFC: Correct outgoing frame before requeueing Driver must handle its data added to the frame, so at this point removeing control field of shdlc frame is enough. Signed-off-by: Waldemar Rymarkiewicz Acked-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/shdlc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index ed8796b78f39..9357ba7362f6 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -240,8 +240,7 @@ static void nfc_shdlc_requeue_ack_pending(struct nfc_shdlc *shdlc) pr_debug("ns reset to %d\n", shdlc->dnr); while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) { - skb_pull(skb, 2); /* remove len+control */ - skb_trim(skb, skb->len - 2); /* remove crc */ + skb_pull(skb, 1); /* remove control field */ skb_queue_head(&shdlc->send_q, skb); } shdlc->ns = shdlc->dnr; -- cgit v1.2.1 From b5faa648faf974b58e5a79eafa9a97e1deed7a8a Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 11 Sep 2012 10:41:41 +0200 Subject: NFC: Changed the HCI cmd execution callback prototype Make it match the data_exchange_cb_t so that it can be used directly in the implementation of an asynchronous hci_transceive Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/command.c | 11 ++++++++--- net/nfc/hci/core.c | 15 +++++++-------- net/nfc/hci/hci.h | 15 ++++----------- net/nfc/hci/hcp.c | 4 ++-- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 46362ef979db..15e21093c7a5 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -28,10 +28,15 @@ #include "hci.h" -static void nfc_hci_execute_cb(struct nfc_hci_dev *hdev, int err, - struct sk_buff *skb, void *cb_data) +/* + * HCI command execution completion callback. + * err will be a standard linux error (may be converted from HCI response) + * skb contains the response data and must be disposed, or may be NULL if + * an error occured + */ +static void nfc_hci_execute_cb(void *context, struct sk_buff *skb, int err) { - struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)cb_data; + struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)context; pr_debug("HCI Cmd completed with result=%d\n", err); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 15744c01bddc..e387c86e0cc7 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -57,12 +57,11 @@ static void nfc_hci_msg_tx_work(struct work_struct *work) if (hdev->cmd_pending_msg) { if (timer_pending(&hdev->cmd_timer) == 0) { if (hdev->cmd_pending_msg->cb) - hdev->cmd_pending_msg->cb(hdev, - -ETIME, - NULL, - hdev-> + hdev->cmd_pending_msg->cb(hdev-> cmd_pending_msg-> - cb_context); + cb_context, + NULL, + -ETIME); kfree(hdev->cmd_pending_msg); hdev->cmd_pending_msg = NULL; } else @@ -83,7 +82,7 @@ next_msg: kfree_skb(skb); skb_queue_purge(&msg->msg_frags); if (msg->cb) - msg->cb(hdev, r, NULL, msg->cb_context); + msg->cb(msg->cb_context, NULL, r); kfree(msg); break; } @@ -133,8 +132,8 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err, del_timer_sync(&hdev->cmd_timer); if (hdev->cmd_pending_msg->cb) - hdev->cmd_pending_msg->cb(hdev, err, skb, - hdev->cmd_pending_msg->cb_context); + hdev->cmd_pending_msg->cb(hdev->cmd_pending_msg->cb_context, + skb, err); else kfree_skb(skb); diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index fa9a21e92239..b274d12c18ac 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -20,6 +20,8 @@ #ifndef __LOCAL_HCI_H #define __LOCAL_HCI_H +#include + struct gate_pipe_map { u8 gate; u8 pipe; @@ -35,15 +37,6 @@ struct hcp_packet { struct hcp_message message; } __packed; -/* - * HCI command execution completion callback. - * result will be a standard linux error (may be converted from HCI response) - * skb contains the response data and must be disposed, or may be NULL if - * an error occured - */ -typedef void (*hci_cmd_cb_t) (struct nfc_hci_dev *hdev, int result, - struct sk_buff *skb, void *cb_data); - struct hcp_exec_waiter { wait_queue_head_t *wq; bool exec_complete; @@ -55,7 +48,7 @@ struct hci_msg { struct list_head msg_l; struct sk_buff_head msg_frags; bool wait_response; - hci_cmd_cb_t cb; + data_exchange_cb_t cb; void *cb_context; unsigned long completion_delay; }; @@ -83,7 +76,7 @@ struct hci_create_pipe_resp { int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, const u8 *payload, size_t payload_len, - hci_cmd_cb_t cb, void *cb_data, + data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay); u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index 2372b558abe9..208eedd07ee3 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -35,7 +35,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, const u8 *payload, size_t payload_len, - hci_cmd_cb_t cb, void *cb_data, + data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay) { struct nfc_dev *ndev = hdev->ndev; @@ -52,7 +52,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, skb_queue_head_init(&cmd->msg_frags); cmd->wait_response = (type == NFC_HCI_HCP_COMMAND) ? true : false; cmd->cb = cb; - cmd->cb_context = cb_data; + cmd->cb_context = cb_context; cmd->completion_delay = completion_delay; hci_len = payload_len + 1; -- cgit v1.2.1 From e4c4789e55327e5f2bd6cafcccd46f9b6251bbc3 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 11 Sep 2012 10:42:54 +0200 Subject: NFC: Add a public nfc_hci_send_cmd_async method This method initiates execution of an HCI cmd. Result will be delivered through an asynchronous callback. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/command.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 15e21093c7a5..71c6a7086b8f 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -28,6 +28,20 @@ #include "hci.h" +static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, + const u8 *param, size_t param_len, + data_exchange_cb_t cb, void *cb_context) +{ + pr_debug("exec cmd async through pipe=%d, cmd=%d, plen=%zd\n", pipe, + cmd, param_len); + + /* TODO: Define hci cmd execution delay. Should it be the same + * for all commands? + */ + return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd, + param, param_len, cb, cb_context, 3000); +} + /* * HCI command execution completion callback. * err will be a standard linux error (may be converted from HCI response) @@ -60,7 +74,8 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, hcp_ew.exec_complete = false; hcp_ew.result_skb = NULL; - pr_debug("through pipe=%d, cmd=%d, plen=%zd\n", pipe, cmd, param_len); + pr_debug("exec cmd sync through pipe=%d, cmd=%d, plen=%zd\n", pipe, + cmd, param_len); /* TODO: Define hci cmd execution delay. Should it be the same * for all commands? @@ -138,6 +153,23 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, } EXPORT_SYMBOL(nfc_hci_send_cmd); +int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, + const u8 *param, size_t param_len, + data_exchange_cb_t cb, void *cb_context) +{ + u8 pipe; + + pr_debug("\n"); + + pipe = hdev->gate2pipe[gate]; + if (pipe == NFC_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + return nfc_hci_execute_cmd_async(hdev, pipe, cmd, param, param_len, + cb, cb_context); +} +EXPORT_SYMBOL(nfc_hci_send_cmd_async); + int nfc_hci_set_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx, const u8 *param, size_t param_len) { -- cgit v1.2.1 From f3e8fb552789f4845e60b11c47b676d14b9488e5 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 11 Sep 2012 10:43:50 +0200 Subject: NFC: Modified hci_transceive to become an asynchronous operation This enables the completion callback to be called from a different context, preventing a possible deadlock if the callback resulted in the invocation of a nested call to the currently locked nfc_dev. This is also more in line with the im_transceive nfc_ops for NFC Core or NCI drivers which already behave asynchronously. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 57 ++++++++++++++++++++++++++++++++++++----------------- net/nfc/hci/shdlc.c | 5 +++-- 2 files changed, 42 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e387c86e0cc7..dc57e3dc15a4 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -537,13 +537,37 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev, { } +#define HCI_CB_TYPE_TRANSCEIVE 1 + +static void hci_transceive_cb(void *context, struct sk_buff *skb, int err) +{ + struct nfc_hci_dev *hdev = context; + + switch (hdev->async_cb_type) { + case HCI_CB_TYPE_TRANSCEIVE: + /* + * TODO: Check RF Error indicator to make sure data is valid. + * It seems that HCI cmd can complete without error, but data + * can be invalid if an RF error occured? Ignore for now. + */ + if (err == 0) + skb_trim(skb, skb->len - 1); /* RF Err ind */ + + hdev->async_cb(hdev->async_cb_context, skb, err); + break; + default: + if (err == 0) + kfree_skb(skb); + break; + } +} + static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); int r; - struct sk_buff *res_skb = NULL; pr_debug("target_idx=%d\n", target->idx); @@ -551,40 +575,37 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, case NFC_HCI_RF_READER_A_GATE: case NFC_HCI_RF_READER_B_GATE: if (hdev->ops->data_exchange) { - r = hdev->ops->data_exchange(hdev, target, skb, - &res_skb); + r = hdev->ops->data_exchange(hdev, target, skb, cb, + cb_context); if (r <= 0) /* handled */ break; } *skb_push(skb, 1) = 0; /* CTR, see spec:10.2.2.1 */ - r = nfc_hci_send_cmd(hdev, target->hci_reader_gate, - NFC_HCI_WR_XCHG_DATA, - skb->data, skb->len, &res_skb); - /* - * TODO: Check RF Error indicator to make sure data is valid. - * It seems that HCI cmd can complete without error, but data - * can be invalid if an RF error occured? Ignore for now. - */ - if (r == 0) - skb_trim(res_skb, res_skb->len - 1); /* RF Err ind */ + + hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE; + hdev->async_cb = cb; + hdev->async_cb_context = cb_context; + + r = nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + NFC_HCI_WR_XCHG_DATA, skb->data, + skb->len, hci_transceive_cb, hdev); break; default: if (hdev->ops->data_exchange) { - r = hdev->ops->data_exchange(hdev, target, skb, - &res_skb); + r = hdev->ops->data_exchange(hdev, target, skb, cb, + cb_context); if (r == 1) r = -ENOTSUPP; } else r = -ENOTSUPP; + break; } kfree_skb(skb); - cb(cb_context, res_skb, r); - - return 0; + return r; } static int hci_check_presence(struct nfc_dev *nfc_dev, diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 9357ba7362f6..c63af7d3e859 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -777,12 +777,13 @@ static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev, static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev, struct nfc_target *target, struct sk_buff *skb, - struct sk_buff **res_skb) + data_exchange_cb_t cb, void *cb_context) { struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); if (shdlc->ops->data_exchange) - return shdlc->ops->data_exchange(shdlc, target, skb, res_skb); + return shdlc->ops->data_exchange(shdlc, target, skb, cb, + cb_context); return -EPERM; } -- cgit v1.2.1 From 67cccfe17d1b3da1ed6c79e643c9be95ebde9642 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Thu, 13 Sep 2012 17:10:00 +0200 Subject: NFC: Add an LLC Core layer to HCI The LLC layer manages modules that control the link layer protocol (such as shdlc) between HCI and an HCI driver. The driver must simply specify the required llc when it registers with HCI. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/Makefile | 2 +- net/nfc/hci/core.c | 14 +++++ net/nfc/hci/llc.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/hci/llc.h | 58 ++++++++++++++++++ 4 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 net/nfc/hci/llc.c create mode 100644 net/nfc/hci/llc.h (limited to 'net') diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile index f9c44b2fb065..b44686b581af 100644 --- a/net/nfc/hci/Makefile +++ b/net/nfc/hci/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_NFC_HCI) += hci.o -hci-y := core.o hcp.o command.o +hci-y := core.o hcp.o command.o llc.o hci-$(CONFIG_NFC_SHDLC) += shdlc.o diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index dc57e3dc15a4..069e2d6056e5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -26,6 +26,7 @@ #include #include +#include #include "hci.h" @@ -821,4 +822,17 @@ void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL(nfc_hci_recv_frame); +static int __init nfc_hci_init(void) +{ + return nfc_llc_init(); +} + +static void __exit nfc_hci_exit(void) +{ + nfc_llc_exit(); +} + +module_init(nfc_hci_init); +module_exit(nfc_hci_exit); + MODULE_LICENSE("GPL"); diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c new file mode 100644 index 000000000000..73c42785ce84 --- /dev/null +++ b/net/nfc/hci/llc.c @@ -0,0 +1,168 @@ +/* + * Link Layer Control manager + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include "llc.h" + +static struct list_head llc_engines; + +int nfc_llc_init(void) +{ + INIT_LIST_HEAD(&llc_engines); + + return 0; +} +EXPORT_SYMBOL(nfc_llc_init); + +void nfc_llc_exit(void) +{ + struct nfc_llc_engine *llc_engine, *n; + + list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) { + list_del(&llc_engine->entry); + kfree(llc_engine->name); + kfree(llc_engine); + } +} +EXPORT_SYMBOL(nfc_llc_exit); + +int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) +{ + struct nfc_llc_engine *llc_engine; + + llc_engine = kzalloc(sizeof(struct nfc_llc_engine), GFP_KERNEL); + if (llc_engine == NULL) + return -ENOMEM; + + llc_engine->name = kstrdup(name, GFP_KERNEL); + if (llc_engine->name == NULL) { + kfree(llc_engine); + return -ENOMEM; + } + llc_engine->ops = ops; + + INIT_LIST_HEAD(&llc_engine->entry); + list_add_tail (&llc_engine->entry, &llc_engines); + + return 0; +} +EXPORT_SYMBOL(nfc_llc_register); + +static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) +{ + struct nfc_llc_engine *llc_engine; + + list_for_each_entry(llc_engine, &llc_engines, entry) { + if (strcmp(llc_engine->name, name) == 0) + return llc_engine; + } + + return NULL; +} + +void nfc_llc_unregister(const char *name) +{ + struct nfc_llc_engine *llc_engine; + + llc_engine = nfc_llc_name_to_engine(name); + if (llc_engine == NULL) + return; + + list_del(&llc_engine->entry); + kfree(llc_engine->name); + kfree(llc_engine); +} +EXPORT_SYMBOL(nfc_llc_unregister); + +struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, + xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, llc_failure_t llc_failure) +{ + struct nfc_llc_engine *llc_engine; + struct nfc_llc *llc; + + llc_engine = nfc_llc_name_to_engine(name); + if (llc_engine == NULL) + return NULL; + + llc = kzalloc(sizeof(struct nfc_llc), GFP_KERNEL); + if (llc == NULL) + return NULL; + + llc->data = llc_engine->ops->init(hdev, xmit_to_drv, rcv_to_hci, + tx_headroom, tx_tailroom, + &llc->rx_headroom, &llc->rx_tailroom, + llc_failure); + if (llc->data == NULL) { + kfree(llc); + return NULL; + } + llc->ops = llc_engine->ops; + + return llc; +} +EXPORT_SYMBOL(nfc_llc_allocate); + +void nfc_llc_free(struct nfc_llc *llc) +{ + llc->ops->deinit(llc); + kfree(llc); +} +EXPORT_SYMBOL(nfc_llc_free); + +inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, + int *rx_tailroom) +{ + *rx_headroom = llc->rx_headroom; + *rx_tailroom = llc->rx_tailroom; +} +EXPORT_SYMBOL(nfc_llc_get_rx_head_tail_room); + +inline int nfc_llc_start(struct nfc_llc *llc) +{ + return llc->ops->start(llc); +} +EXPORT_SYMBOL(nfc_llc_start); + +inline int nfc_llc_stop(struct nfc_llc *llc) +{ + return llc->ops->stop(llc); +} +EXPORT_SYMBOL(nfc_llc_stop); + +inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) +{ + llc->ops->rcv_from_drv(llc, skb); +} +EXPORT_SYMBOL(nfc_llc_rcv_from_drv); + +inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) +{ + return llc->ops->xmit_from_hci(llc, skb); +} +EXPORT_SYMBOL(nfc_llc_xmit_from_hci); + +inline void *nfc_llc_get_data(struct nfc_llc *llc) +{ + return llc->data; +} +EXPORT_SYMBOL(nfc_llc_get_data); diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h new file mode 100644 index 000000000000..b2c7285b0309 --- /dev/null +++ b/net/nfc/hci/llc.h @@ -0,0 +1,58 @@ +/* + * Link Layer Control manager + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LOCAL_LLC_H_ +#define __LOCAL_LLC_H_ + +#include +#include +#include + +struct nfc_llc_ops { + void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure); + void (*deinit) (struct nfc_llc *llc); + int (*start) (struct nfc_llc *llc); + int (*stop) (struct nfc_llc *llc); + void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb); + int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb); +}; + +struct nfc_llc_engine { + const char *name; + struct nfc_llc_ops *ops; + struct list_head entry; +}; + +struct nfc_llc { + void *data; + struct nfc_llc_ops *ops; + int rx_headroom; + int rx_tailroom; +}; + +void *nfc_llc_get_data(struct nfc_llc *llc); + +int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); +void nfc_llc_unregister(const char *name); + +#endif /* __LOCAL_LLC_H_ */ -- cgit v1.2.1 From 8af00d48dc929442644bf68e9cd3d951d9697296 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Thu, 13 Sep 2012 17:10:48 +0200 Subject: NFC: Add a nop (passthrough) llc module to llc core This is a passthrough llc. It can be used by HCI drivers that don't need link layer control. HCI will then write directly to the driver, and driver will deliver incoming frames directly to HCI without any processing. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/Makefile | 2 +- net/nfc/hci/llc.c | 2 +- net/nfc/hci/llc.h | 2 + net/nfc/hci/llc_nop.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 net/nfc/hci/llc_nop.c (limited to 'net') diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile index b44686b581af..2ec4e5876f6b 100644 --- a/net/nfc/hci/Makefile +++ b/net/nfc/hci/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_NFC_HCI) += hci.o -hci-y := core.o hcp.o command.o llc.o +hci-y := core.o hcp.o command.o llc.o llc_nop.o hci-$(CONFIG_NFC_SHDLC) += shdlc.o diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 73c42785ce84..32002e5339c0 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -28,7 +28,7 @@ int nfc_llc_init(void) { INIT_LIST_HEAD(&llc_engines); - return 0; + return nfc_llc_nop_register(); } EXPORT_SYMBOL(nfc_llc_init); diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index b2c7285b0309..acdd8d1bbae5 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -55,4 +55,6 @@ void *nfc_llc_get_data(struct nfc_llc *llc); int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); void nfc_llc_unregister(const char *name); +int nfc_llc_nop_register(void); + #endif /* __LOCAL_LLC_H_ */ diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c new file mode 100644 index 000000000000..ec627cee12cd --- /dev/null +++ b/net/nfc/hci/llc_nop.c @@ -0,0 +1,101 @@ +/* + * nop (passthrough) Link Layer Control + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include + +#include "llc.h" + +struct llc_nop { + struct nfc_hci_dev *hdev; + xmit_to_drv_t xmit_to_drv; + rcv_to_hci_t rcv_to_hci; + int tx_headroom; + int tx_tailroom; + llc_failure_t llc_failure; +}; + +static void *llc_nop_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure) +{ + struct llc_nop *llc_nop; + + *rx_headroom = 0; + *rx_tailroom = 0; + + llc_nop = kzalloc(sizeof(struct llc_nop), GFP_KERNEL); + if (llc_nop == NULL) + return NULL; + + llc_nop->hdev = hdev; + llc_nop->xmit_to_drv = xmit_to_drv; + llc_nop->rcv_to_hci = rcv_to_hci; + llc_nop->tx_headroom = tx_headroom; + llc_nop->tx_tailroom = tx_tailroom; + llc_nop->llc_failure = llc_failure; + + return llc_nop; +} + +static void llc_nop_deinit(struct nfc_llc *llc) +{ + kfree(nfc_llc_get_data(llc)); +} + +static int llc_nop_start(struct nfc_llc *llc) +{ + return 0; +} + +static int llc_nop_stop(struct nfc_llc *llc) +{ + return 0; +} + +static void llc_nop_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_nop *llc_nop = nfc_llc_get_data(llc); + + llc_nop->rcv_to_hci(llc_nop->hdev, skb); +} + +static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_nop *llc_nop = nfc_llc_get_data(llc); + + return llc_nop->xmit_to_drv(llc_nop->hdev, skb); +} + +static struct nfc_llc_ops llc_nop_ops = { + .init = llc_nop_init, + .deinit = llc_nop_deinit, + .start = llc_nop_start, + .stop = llc_nop_stop, + .rcv_from_drv = llc_nop_rcv_from_drv, + .xmit_from_hci = llc_nop_xmit_from_hci, +}; + +int nfc_llc_nop_register() +{ + return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops); +} +EXPORT_SYMBOL(nfc_llc_nop_register); -- cgit v1.2.1 From 4a61cd6687fc6348d08724676d34e38160d6cf9b Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Thu, 13 Sep 2012 17:11:37 +0200 Subject: NFC: Add an shdlc llc module to llc core This is used by HCI drivers such as the one for the pn544 which require communications between HCI and the chip to use shdlc. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/Makefile | 2 +- net/nfc/hci/llc.c | 16 +- net/nfc/hci/llc.h | 1 + net/nfc/hci/llc_shdlc.c | 834 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 851 insertions(+), 2 deletions(-) create mode 100644 net/nfc/hci/llc_shdlc.c (limited to 'net') diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile index 2ec4e5876f6b..c4d65479629b 100644 --- a/net/nfc/hci/Makefile +++ b/net/nfc/hci/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_NFC_HCI) += hci.o hci-y := core.o hcp.o command.o llc.o llc_nop.o -hci-$(CONFIG_NFC_SHDLC) += shdlc.o +hci-$(CONFIG_NFC_SHDLC) += shdlc.o llc_shdlc.o diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 32002e5339c0..bd11b0f7658a 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -26,9 +26,23 @@ static struct list_head llc_engines; int nfc_llc_init(void) { + int r; + INIT_LIST_HEAD(&llc_engines); - return nfc_llc_nop_register(); + r = nfc_llc_nop_register(); + if (r) + goto exit; + + r = nfc_llc_shdlc_register(); + if (r) + goto exit; + + return 0; + +exit: + nfc_llc_exit(); + return r; } EXPORT_SYMBOL(nfc_llc_init); diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index acdd8d1bbae5..c7014fdfc8c9 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -56,5 +56,6 @@ int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); +int nfc_llc_shdlc_register(void); #endif /* __LOCAL_LLC_H_ */ diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c new file mode 100644 index 000000000000..bb191100ee96 --- /dev/null +++ b/net/nfc/hci/llc_shdlc.c @@ -0,0 +1,834 @@ +/* + * shdlc Link Layer Control + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define pr_fmt(fmt) "shdlc: %s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include + +#include "llc.h" + +enum shdlc_state { + SHDLC_DISCONNECTED = 0, + SHDLC_CONNECTING = 1, + SHDLC_NEGOCIATING = 2, + SHDLC_CONNECTED = 3 +}; + +struct llc_shdlc { + struct nfc_hci_dev *hdev; + xmit_to_drv_t xmit_to_drv; + rcv_to_hci_t rcv_to_hci; + + struct mutex state_mutex; + enum shdlc_state state; + int hard_fault; + + wait_queue_head_t *connect_wq; + int connect_tries; + int connect_result; + struct timer_list connect_timer;/* aka T3 in spec 10.6.1 */ + + u8 w; /* window size */ + bool srej_support; + + struct timer_list t1_timer; /* send ack timeout */ + bool t1_active; + + struct timer_list t2_timer; /* guard/retransmit timeout */ + bool t2_active; + + int ns; /* next seq num for send */ + int nr; /* next expected seq num for receive */ + int dnr; /* oldest sent unacked seq num */ + + struct sk_buff_head rcv_q; + + struct sk_buff_head send_q; + bool rnr; /* other side is not ready to receive */ + + struct sk_buff_head ack_pending_q; + + struct work_struct sm_work; + + int tx_headroom; + int tx_tailroom; + + llc_failure_t llc_failure; +}; + +#define SHDLC_LLC_HEAD_ROOM 2 + +#define SHDLC_MAX_WINDOW 4 +#define SHDLC_SREJ_SUPPORT false + +#define SHDLC_CONTROL_HEAD_MASK 0xe0 +#define SHDLC_CONTROL_HEAD_I 0x80 +#define SHDLC_CONTROL_HEAD_I2 0xa0 +#define SHDLC_CONTROL_HEAD_S 0xc0 +#define SHDLC_CONTROL_HEAD_U 0xe0 + +#define SHDLC_CONTROL_NS_MASK 0x38 +#define SHDLC_CONTROL_NR_MASK 0x07 +#define SHDLC_CONTROL_TYPE_MASK 0x18 + +#define SHDLC_CONTROL_M_MASK 0x1f + +enum sframe_type { + S_FRAME_RR = 0x00, + S_FRAME_REJ = 0x01, + S_FRAME_RNR = 0x02, + S_FRAME_SREJ = 0x03 +}; + +enum uframe_modifier { + U_FRAME_UA = 0x06, + U_FRAME_RSET = 0x19 +}; + +#define SHDLC_CONNECT_VALUE_MS 5 +#define SHDLC_T1_VALUE_MS(w) ((5 * w) / 4) +#define SHDLC_T2_VALUE_MS 300 + +#define SHDLC_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "shdlc: ", DUMP_PREFIX_OFFSET, \ + 16, 1, skb->data, skb->len, 0); \ +} while (0) + +/* checks x < y <= z modulo 8 */ +static bool llc_shdlc_x_lt_y_lteq_z(int x, int y, int z) +{ + if (x < z) + return ((x < y) && (y <= z)) ? true : false; + else + return ((y > x) || (y <= z)) ? true : false; +} + +/* checks x <= y < z modulo 8 */ +static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) +{ + if (x <= z) + return ((x <= y) && (y < z)) ? true : false; + else /* x > z -> z+8 > x */ + return ((y >= x) || (y < z)) ? true : false; +} + +static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, + int payload_len) +{ + struct sk_buff *skb; + + skb = alloc_skb(shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM + + shdlc->tx_tailroom + payload_len, GFP_KERNEL); + if (skb) + skb_reserve(skb, shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM); + + return skb; +} + +/* immediately sends an S frame. */ +static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, + enum sframe_type sframe_type, int nr) +{ + int r; + struct sk_buff *skb; + + pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr); + + skb = llc_shdlc_alloc_skb(shdlc, 0); + if (skb == NULL) + return -ENOMEM; + + *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; + + r = shdlc->xmit_to_drv(shdlc->hdev, skb); + + kfree_skb(skb); + + return r; +} + +/* immediately sends an U frame. skb may contain optional payload */ +static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, + struct sk_buff *skb, + enum uframe_modifier uframe_modifier) +{ + int r; + + pr_debug("uframe_modifier=%d\n", uframe_modifier); + + *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; + + r = shdlc->xmit_to_drv(shdlc->hdev, skb); + + kfree_skb(skb); + + return r; +} + +/* + * Free ack_pending frames until y_nr - 1, and reset t2 according to + * the remaining oldest ack_pending frame sent time + */ +static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) +{ + struct sk_buff *skb; + int dnr = shdlc->dnr; /* MUST initially be < y_nr */ + + pr_debug("release ack pending up to frame %d excluded\n", y_nr); + + while (dnr != y_nr) { + pr_debug("release ack pending frame %d\n", dnr); + + skb = skb_dequeue(&shdlc->ack_pending_q); + kfree_skb(skb); + + dnr = (dnr + 1) % 8; + } + + if (skb_queue_empty(&shdlc->ack_pending_q)) { + if (shdlc->t2_active) { + del_timer_sync(&shdlc->t2_timer); + shdlc->t2_active = false; + + pr_debug + ("All sent frames acked. Stopped T2(retransmit)\n"); + } + } else { + skb = skb_peek(&shdlc->ack_pending_q); + + mod_timer(&shdlc->t2_timer, *(unsigned long *)skb->cb + + msecs_to_jiffies(SHDLC_T2_VALUE_MS)); + shdlc->t2_active = true; + + pr_debug + ("Start T2(retransmit) for remaining unacked sent frames\n"); + } +} + +/* + * Receive validated frames from lower layer. skb contains HCI payload only. + * Handle according to algorithm at spec:10.8.2 + */ +static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc, + struct sk_buff *skb, int ns, int nr) +{ + int x_ns = ns; + int y_nr = nr; + + pr_debug("recvd I-frame %d, remote waiting frame %d\n", ns, nr); + + if (shdlc->state != SHDLC_CONNECTED) + goto exit; + + if (x_ns != shdlc->nr) { + llc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr); + goto exit; + } + + if (shdlc->t1_active == false) { + shdlc->t1_active = true; + mod_timer(&shdlc->t1_timer, jiffies + + msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w))); + pr_debug("(re)Start T1(send ack)\n"); + } + + if (skb->len) { + shdlc->rcv_to_hci(shdlc->hdev, skb); + skb = NULL; + } + + shdlc->nr = (shdlc->nr + 1) % 8; + + if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { + llc_shdlc_reset_t2(shdlc, y_nr); + + shdlc->dnr = y_nr; + } + +exit: + kfree_skb(skb); +} + +static void llc_shdlc_rcv_ack(struct llc_shdlc *shdlc, int y_nr) +{ + pr_debug("remote acked up to frame %d excluded\n", y_nr); + + if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { + llc_shdlc_reset_t2(shdlc, y_nr); + shdlc->dnr = y_nr; + } +} + +static void llc_shdlc_requeue_ack_pending(struct llc_shdlc *shdlc) +{ + struct sk_buff *skb; + + pr_debug("ns reset to %d\n", shdlc->dnr); + + while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) { + skb_pull(skb, 1); /* remove control field */ + skb_queue_head(&shdlc->send_q, skb); + } + shdlc->ns = shdlc->dnr; +} + +static void llc_shdlc_rcv_rej(struct llc_shdlc *shdlc, int y_nr) +{ + struct sk_buff *skb; + + pr_debug("remote asks retransmition from frame %d\n", y_nr); + + if (llc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) { + if (shdlc->t2_active) { + del_timer_sync(&shdlc->t2_timer); + shdlc->t2_active = false; + pr_debug("Stopped T2(retransmit)\n"); + } + + if (shdlc->dnr != y_nr) { + while ((shdlc->dnr = ((shdlc->dnr + 1) % 8)) != y_nr) { + skb = skb_dequeue(&shdlc->ack_pending_q); + kfree_skb(skb); + } + } + + llc_shdlc_requeue_ack_pending(shdlc); + } +} + +/* See spec RR:10.8.3 REJ:10.8.4 */ +static void llc_shdlc_rcv_s_frame(struct llc_shdlc *shdlc, + enum sframe_type s_frame_type, int nr) +{ + struct sk_buff *skb; + + if (shdlc->state != SHDLC_CONNECTED) + return; + + switch (s_frame_type) { + case S_FRAME_RR: + llc_shdlc_rcv_ack(shdlc, nr); + if (shdlc->rnr == true) { /* see SHDLC 10.7.7 */ + shdlc->rnr = false; + if (shdlc->send_q.qlen == 0) { + skb = llc_shdlc_alloc_skb(shdlc, 0); + if (skb) + skb_queue_tail(&shdlc->send_q, skb); + } + } + break; + case S_FRAME_REJ: + llc_shdlc_rcv_rej(shdlc, nr); + break; + case S_FRAME_RNR: + llc_shdlc_rcv_ack(shdlc, nr); + shdlc->rnr = true; + break; + default: + break; + } +} + +static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) +{ + pr_debug("result=%d\n", r); + + del_timer_sync(&shdlc->connect_timer); + + if (r == 0) { + shdlc->ns = 0; + shdlc->nr = 0; + shdlc->dnr = 0; + + shdlc->state = SHDLC_CONNECTED; + } else { + shdlc->state = SHDLC_DISCONNECTED; + } + + shdlc->connect_result = r; + + wake_up(shdlc->connect_wq); +} + +static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) +{ + struct sk_buff *skb; + + pr_debug("\n"); + + skb = llc_shdlc_alloc_skb(shdlc, 2); + if (skb == NULL) + return -ENOMEM; + + *skb_put(skb, 1) = SHDLC_MAX_WINDOW; + *skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0; + + return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); +} + +static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) +{ + struct sk_buff *skb; + + pr_debug("\n"); + + skb = llc_shdlc_alloc_skb(shdlc, 0); + if (skb == NULL) + return -ENOMEM; + + return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA); +} + +static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, + struct sk_buff *skb, + enum uframe_modifier u_frame_modifier) +{ + u8 w = SHDLC_MAX_WINDOW; + bool srej_support = SHDLC_SREJ_SUPPORT; + int r; + + pr_debug("u_frame_modifier=%d\n", u_frame_modifier); + + switch (u_frame_modifier) { + case U_FRAME_RSET: + if (shdlc->state == SHDLC_NEGOCIATING) { + /* we sent RSET, but chip wants to negociate */ + if (skb->len > 0) + w = skb->data[0]; + + if (skb->len > 1) + srej_support = skb->data[1] & 0x01 ? true : + false; + + if ((w <= SHDLC_MAX_WINDOW) && + (SHDLC_SREJ_SUPPORT || (srej_support == false))) { + shdlc->w = w; + shdlc->srej_support = srej_support; + r = llc_shdlc_connect_send_ua(shdlc); + llc_shdlc_connect_complete(shdlc, r); + } + } else if (shdlc->state == SHDLC_CONNECTED) { + /* + * Chip wants to reset link. This is unexpected and + * unsupported. + */ + shdlc->hard_fault = -ECONNRESET; + } + break; + case U_FRAME_UA: + if ((shdlc->state == SHDLC_CONNECTING && + shdlc->connect_tries > 0) || + (shdlc->state == SHDLC_NEGOCIATING)) + llc_shdlc_connect_complete(shdlc, 0); + break; + default: + break; + } + + kfree_skb(skb); +} + +static void llc_shdlc_handle_rcv_queue(struct llc_shdlc *shdlc) +{ + struct sk_buff *skb; + u8 control; + int nr; + int ns; + enum sframe_type s_frame_type; + enum uframe_modifier u_frame_modifier; + + if (shdlc->rcv_q.qlen) + pr_debug("rcvQlen=%d\n", shdlc->rcv_q.qlen); + + while ((skb = skb_dequeue(&shdlc->rcv_q)) != NULL) { + control = skb->data[0]; + skb_pull(skb, 1); + switch (control & SHDLC_CONTROL_HEAD_MASK) { + case SHDLC_CONTROL_HEAD_I: + case SHDLC_CONTROL_HEAD_I2: + ns = (control & SHDLC_CONTROL_NS_MASK) >> 3; + nr = control & SHDLC_CONTROL_NR_MASK; + llc_shdlc_rcv_i_frame(shdlc, skb, ns, nr); + break; + case SHDLC_CONTROL_HEAD_S: + s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3; + nr = control & SHDLC_CONTROL_NR_MASK; + llc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr); + kfree_skb(skb); + break; + case SHDLC_CONTROL_HEAD_U: + u_frame_modifier = control & SHDLC_CONTROL_M_MASK; + llc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier); + break; + default: + pr_err("UNKNOWN Control=%d\n", control); + kfree_skb(skb); + break; + } + } +} + +static int llc_shdlc_w_used(int ns, int dnr) +{ + int unack_count; + + if (dnr <= ns) + unack_count = ns - dnr; + else + unack_count = 8 - dnr + ns; + + return unack_count; +} + +/* Send frames according to algorithm at spec:10.8.1 */ +static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) +{ + struct sk_buff *skb; + int r; + unsigned long time_sent; + + if (shdlc->send_q.qlen) + pr_debug + ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", + shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, + shdlc->rnr == false ? "false" : "true", + shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), + shdlc->ack_pending_q.qlen); + + while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w && + (shdlc->rnr == false)) { + + if (shdlc->t1_active) { + del_timer_sync(&shdlc->t1_timer); + shdlc->t1_active = false; + pr_debug("Stopped T1(send ack)\n"); + } + + skb = skb_dequeue(&shdlc->send_q); + + *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) | + shdlc->nr; + + pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns, + shdlc->nr); + /* SHDLC_DUMP_SKB("shdlc frame written", skb); */ + + r = shdlc->xmit_to_drv(shdlc->hdev, skb); + if (r < 0) { + shdlc->hard_fault = r; + break; + } + + shdlc->ns = (shdlc->ns + 1) % 8; + + time_sent = jiffies; + *(unsigned long *)skb->cb = time_sent; + + skb_queue_tail(&shdlc->ack_pending_q, skb); + + if (shdlc->t2_active == false) { + shdlc->t2_active = true; + mod_timer(&shdlc->t2_timer, time_sent + + msecs_to_jiffies(SHDLC_T2_VALUE_MS)); + pr_debug("Started T2 (retransmit)\n"); + } + } +} + +static void llc_shdlc_connect_timeout(unsigned long data) +{ + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + + pr_debug("\n"); + + queue_work(system_nrt_wq, &shdlc->sm_work); +} + +static void llc_shdlc_t1_timeout(unsigned long data) +{ + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + + pr_debug("SoftIRQ: need to send ack\n"); + + queue_work(system_nrt_wq, &shdlc->sm_work); +} + +static void llc_shdlc_t2_timeout(unsigned long data) +{ + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + + pr_debug("SoftIRQ: need to retransmit\n"); + + queue_work(system_nrt_wq, &shdlc->sm_work); +} + +static void llc_shdlc_sm_work(struct work_struct *work) +{ + struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work); + int r; + + pr_debug("\n"); + + mutex_lock(&shdlc->state_mutex); + + switch (shdlc->state) { + case SHDLC_DISCONNECTED: + skb_queue_purge(&shdlc->rcv_q); + skb_queue_purge(&shdlc->send_q); + skb_queue_purge(&shdlc->ack_pending_q); + break; + case SHDLC_CONNECTING: + if (shdlc->hard_fault) { + llc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } + + if (shdlc->connect_tries++ < 5) + r = llc_shdlc_connect_initiate(shdlc); + else + r = -ETIME; + if (r < 0) + llc_shdlc_connect_complete(shdlc, r); + else { + mod_timer(&shdlc->connect_timer, jiffies + + msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS)); + + shdlc->state = SHDLC_NEGOCIATING; + } + break; + case SHDLC_NEGOCIATING: + if (timer_pending(&shdlc->connect_timer) == 0) { + shdlc->state = SHDLC_CONNECTING; + queue_work(system_nrt_wq, &shdlc->sm_work); + } + + llc_shdlc_handle_rcv_queue(shdlc); + + if (shdlc->hard_fault) { + llc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } + break; + case SHDLC_CONNECTED: + llc_shdlc_handle_rcv_queue(shdlc); + llc_shdlc_handle_send_queue(shdlc); + + if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) { + pr_debug + ("Handle T1(send ack) elapsed (T1 now inactive)\n"); + + shdlc->t1_active = false; + r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR, + shdlc->nr); + if (r < 0) + shdlc->hard_fault = r; + } + + if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) { + pr_debug + ("Handle T2(retransmit) elapsed (T2 inactive)\n"); + + shdlc->t2_active = false; + + llc_shdlc_requeue_ack_pending(shdlc); + llc_shdlc_handle_send_queue(shdlc); + } + + if (shdlc->hard_fault) { + shdlc->llc_failure(shdlc->hdev, shdlc->hard_fault); + } + break; + default: + break; + } + mutex_unlock(&shdlc->state_mutex); +} + +/* + * Called from syscall context to establish shdlc link. Sleeps until + * link is ready or failure. + */ +static int llc_shdlc_connect(struct llc_shdlc *shdlc) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); + + pr_debug("\n"); + + mutex_lock(&shdlc->state_mutex); + + shdlc->state = SHDLC_CONNECTING; + shdlc->connect_wq = &connect_wq; + shdlc->connect_tries = 0; + shdlc->connect_result = 1; + + mutex_unlock(&shdlc->state_mutex); + + queue_work(system_nrt_wq, &shdlc->sm_work); + + wait_event(connect_wq, shdlc->connect_result != 1); + + return shdlc->connect_result; +} + +static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) +{ + pr_debug("\n"); + + mutex_lock(&shdlc->state_mutex); + + shdlc->state = SHDLC_DISCONNECTED; + + mutex_unlock(&shdlc->state_mutex); + + queue_work(system_nrt_wq, &shdlc->sm_work); +} + +/* + * Receive an incoming shdlc frame. Frame has already been crc-validated. + * skb contains only LLC header and payload. + * If skb == NULL, it is a notification that the link below is dead. + */ +static void llc_shdlc_recv_frame(struct llc_shdlc *shdlc, struct sk_buff *skb) +{ + if (skb == NULL) { + pr_err("NULL Frame -> link is dead\n"); + shdlc->hard_fault = -EREMOTEIO; + } else { + SHDLC_DUMP_SKB("incoming frame", skb); + skb_queue_tail(&shdlc->rcv_q, skb); + } + + queue_work(system_nrt_wq, &shdlc->sm_work); +} + +static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure) +{ + struct llc_shdlc *shdlc; + + *rx_headroom = SHDLC_LLC_HEAD_ROOM; + *rx_tailroom = 0; + + shdlc = kzalloc(sizeof(struct llc_shdlc), GFP_KERNEL); + if (shdlc == NULL) + return NULL; + + mutex_init(&shdlc->state_mutex); + shdlc->state = SHDLC_DISCONNECTED; + + init_timer(&shdlc->connect_timer); + shdlc->connect_timer.data = (unsigned long)shdlc; + shdlc->connect_timer.function = llc_shdlc_connect_timeout; + + init_timer(&shdlc->t1_timer); + shdlc->t1_timer.data = (unsigned long)shdlc; + shdlc->t1_timer.function = llc_shdlc_t1_timeout; + + init_timer(&shdlc->t2_timer); + shdlc->t2_timer.data = (unsigned long)shdlc; + shdlc->t2_timer.function = llc_shdlc_t2_timeout; + + shdlc->w = SHDLC_MAX_WINDOW; + shdlc->srej_support = SHDLC_SREJ_SUPPORT; + + skb_queue_head_init(&shdlc->rcv_q); + skb_queue_head_init(&shdlc->send_q); + skb_queue_head_init(&shdlc->ack_pending_q); + + INIT_WORK(&shdlc->sm_work, llc_shdlc_sm_work); + + shdlc->hdev = hdev; + shdlc->xmit_to_drv = xmit_to_drv; + shdlc->rcv_to_hci = rcv_to_hci; + shdlc->tx_headroom = tx_headroom; + shdlc->tx_tailroom = tx_tailroom; + shdlc->llc_failure = llc_failure; + + return shdlc; +} + +static void llc_shdlc_deinit(struct nfc_llc *llc) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + skb_queue_purge(&shdlc->rcv_q); + skb_queue_purge(&shdlc->send_q); + skb_queue_purge(&shdlc->ack_pending_q); + + kfree(shdlc); +} + +static int llc_shdlc_start(struct nfc_llc *llc) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + return llc_shdlc_connect(shdlc); +} + +static int llc_shdlc_stop(struct nfc_llc *llc) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + llc_shdlc_disconnect(shdlc); + + return 0; +} + +static void llc_shdlc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + llc_shdlc_recv_frame(shdlc, skb); +} + +static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + skb_queue_tail(&shdlc->send_q, skb); + + queue_work(system_nrt_wq, &shdlc->sm_work); + + return 0; +} + +static struct nfc_llc_ops llc_shdlc_ops = { + .init = llc_shdlc_init, + .deinit = llc_shdlc_deinit, + .start = llc_shdlc_start, + .stop = llc_shdlc_stop, + .rcv_from_drv = llc_shdlc_rcv_from_drv, + .xmit_from_hci = llc_shdlc_xmit_from_hci, +}; + +int nfc_llc_shdlc_register() +{ + return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops); +} +EXPORT_SYMBOL(nfc_llc_shdlc_register); -- cgit v1.2.1 From 412fda538f4b1317ecd0fbe6e5bc9124792bea88 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 18 Sep 2012 19:45:48 +0200 Subject: NFC: Changed HCI and PN544 HCI driver to use the new HCI LLC Core The previous shdlc HCI driver and its header are removed from the tree. PN544 now registers directly with HCI and passes the name of the llc it requires (shdlc). HCI instantiation now allocates the required llc instance. The llc is started when the HCI device is brought up. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/Makefile | 2 +- net/nfc/hci/core.c | 210 ++++++----- net/nfc/hci/llc_shdlc.c | 2 +- net/nfc/hci/shdlc.c | 918 ------------------------------------------------ 4 files changed, 123 insertions(+), 1009 deletions(-) delete mode 100644 net/nfc/hci/shdlc.c (limited to 'net') diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile index c4d65479629b..c5dbb6891b24 100644 --- a/net/nfc/hci/Makefile +++ b/net/nfc/hci/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_NFC_HCI) += hci.o hci-y := core.o hcp.o command.o llc.o llc_nop.o -hci-$(CONFIG_NFC_SHDLC) += shdlc.o llc_shdlc.o +hci-$(CONFIG_NFC_SHDLC) += llc_shdlc.o diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 069e2d6056e5..c1129c22d835 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -78,7 +78,7 @@ next_msg: pr_debug("msg_tx_queue has a cmd to send\n"); while ((skb = skb_dequeue(&msg->msg_frags)) != NULL) { - r = hdev->ops->xmit(hdev, skb); + r = nfc_llc_xmit_from_hci(hdev->llc, skb); if (r < 0) { kfree_skb(skb); skb_queue_purge(&msg->msg_frags); @@ -469,29 +469,38 @@ static int hci_dev_up(struct nfc_dev *nfc_dev) return r; } + r = nfc_llc_start(hdev->llc); + if (r < 0) + goto exit_close; + r = hci_dev_session_init(hdev); if (r < 0) - goto exit; + goto exit_llc; r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_END_OPERATION, NULL, 0); if (r < 0) - goto exit; + goto exit_llc; if (hdev->ops->hci_ready) { r = hdev->ops->hci_ready(hdev); if (r < 0) - goto exit; + goto exit_llc; } r = hci_dev_version(hdev); if (r < 0) - goto exit; + goto exit_llc; + + return 0; + +exit_llc: + nfc_llc_stop(hdev->llc); + +exit_close: + if (hdev->ops->close) + hdev->ops->close(hdev); -exit: - if (r < 0) - if (hdev->ops->close) - hdev->ops->close(hdev); return r; } @@ -499,6 +508,8 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + nfc_llc_stop(hdev->llc); + if (hdev->ops->close) hdev->ops->close(hdev); @@ -620,6 +631,93 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, return 0; } +static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) +{ + mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->cmd_pending_msg == NULL) { + nfc_driver_failure(hdev->ndev, err); + goto exit; + } + + __nfc_hci_cmd_completion(hdev, err, NULL); + +exit: + mutex_unlock(&hdev->msg_tx_mutex); +} + +static void nfc_hci_llc_failure(struct nfc_hci_dev *hdev, int err) +{ + nfc_hci_failure(hdev, err); +} + +static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + struct hcp_packet *packet; + u8 type; + u8 instruction; + struct sk_buff *hcp_skb; + u8 pipe; + struct sk_buff *frag_skb; + int msg_len; + + packet = (struct hcp_packet *)skb->data; + if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { + skb_queue_tail(&hdev->rx_hcp_frags, skb); + return; + } + + /* it's the last fragment. Does it need re-aggregation? */ + if (skb_queue_len(&hdev->rx_hcp_frags)) { + pipe = packet->header & NFC_HCI_FRAGMENT; + skb_queue_tail(&hdev->rx_hcp_frags, skb); + + msg_len = 0; + skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { + msg_len += (frag_skb->len - + NFC_HCI_HCP_PACKET_HEADER_LEN); + } + + hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN + + msg_len, GFP_KERNEL); + if (hcp_skb == NULL) { + nfc_hci_failure(hdev, -ENOMEM); + return; + } + + *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe; + + skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { + msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN; + memcpy(skb_put(hcp_skb, msg_len), + frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, + msg_len); + } + + skb_queue_purge(&hdev->rx_hcp_frags); + } else { + packet->header &= NFC_HCI_FRAGMENT; + hcp_skb = skb; + } + + /* if this is a response, dispatch immediately to + * unblock waiting cmd context. Otherwise, enqueue to dispatch + * in separate context where handler can also execute command. + */ + packet = (struct hcp_packet *)hcp_skb->data; + type = HCP_MSG_GET_TYPE(packet->message.header); + if (type == NFC_HCI_HCP_RESPONSE) { + pipe = packet->header; + instruction = HCP_MSG_GET_CMD(packet->message.header); + skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN + + NFC_HCI_HCP_MESSAGE_HEADER_LEN); + nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); + } else { + skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); + queue_work(system_nrt_wq, &hdev->msg_rx_work); + } +} + static struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, @@ -634,6 +732,7 @@ static struct nfc_ops hci_nfc_ops = { struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, u32 protocols, + const char *llc_name, int tx_headroom, int tx_tailroom, int max_link_payload) @@ -650,10 +749,19 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, if (hdev == NULL) return NULL; + hdev->llc = nfc_llc_allocate(llc_name, hdev, ops->xmit, + nfc_hci_recv_from_llc, tx_headroom, + tx_tailroom, nfc_hci_llc_failure); + if (hdev->llc == NULL) { + kfree(hdev); + return NULL; + } + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { + nfc_llc_free(hdev->llc); kfree(hdev); return NULL; } @@ -673,6 +781,7 @@ EXPORT_SYMBOL(nfc_hci_allocate_device); void nfc_hci_free_device(struct nfc_hci_dev *hdev) { nfc_free_device(hdev->ndev); + nfc_llc_free(hdev->llc); kfree(hdev); } EXPORT_SYMBOL(nfc_hci_free_device); @@ -733,92 +842,15 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_get_clientdata); -static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) -{ - mutex_lock(&hdev->msg_tx_mutex); - - if (hdev->cmd_pending_msg == NULL) { - nfc_driver_failure(hdev->ndev, err); - goto exit; - } - - __nfc_hci_cmd_completion(hdev, err, NULL); - -exit: - mutex_unlock(&hdev->msg_tx_mutex); -} - void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err) { nfc_hci_failure(hdev, err); } EXPORT_SYMBOL(nfc_hci_driver_failure); -void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) +void inline nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) { - struct hcp_packet *packet; - u8 type; - u8 instruction; - struct sk_buff *hcp_skb; - u8 pipe; - struct sk_buff *frag_skb; - int msg_len; - - packet = (struct hcp_packet *)skb->data; - if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { - skb_queue_tail(&hdev->rx_hcp_frags, skb); - return; - } - - /* it's the last fragment. Does it need re-aggregation? */ - if (skb_queue_len(&hdev->rx_hcp_frags)) { - pipe = packet->header & NFC_HCI_FRAGMENT; - skb_queue_tail(&hdev->rx_hcp_frags, skb); - - msg_len = 0; - skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { - msg_len += (frag_skb->len - - NFC_HCI_HCP_PACKET_HEADER_LEN); - } - - hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN + - msg_len, GFP_KERNEL); - if (hcp_skb == NULL) { - nfc_hci_failure(hdev, -ENOMEM); - return; - } - - *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe; - - skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN; - memcpy(skb_put(hcp_skb, msg_len), - frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, - msg_len); - } - - skb_queue_purge(&hdev->rx_hcp_frags); - } else { - packet->header &= NFC_HCI_FRAGMENT; - hcp_skb = skb; - } - - /* if this is a response, dispatch immediately to - * unblock waiting cmd context. Otherwise, enqueue to dispatch - * in separate context where handler can also execute command. - */ - packet = (struct hcp_packet *)hcp_skb->data; - type = HCP_MSG_GET_TYPE(packet->message.header); - if (type == NFC_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN + - NFC_HCI_HCP_MESSAGE_HEADER_LEN); - nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); - } else { - skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); - queue_work(system_nrt_wq, &hdev->msg_rx_work); - } + nfc_llc_rcv_from_drv(hdev->llc, skb); } EXPORT_SYMBOL(nfc_hci_recv_frame); @@ -832,7 +864,7 @@ static void __exit nfc_hci_exit(void) nfc_llc_exit(); } -module_init(nfc_hci_init); +subsys_initcall(nfc_hci_init); module_exit(nfc_hci_exit); MODULE_LICENSE("GPL"); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index bb191100ee96..fad6cd18d613 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -535,7 +535,7 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns, shdlc->nr); - /* SHDLC_DUMP_SKB("shdlc frame written", skb); */ + SHDLC_DUMP_SKB("shdlc frame written", skb); r = shdlc->xmit_to_drv(shdlc->hdev, skb); if (r < 0) { diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c deleted file mode 100644 index c63af7d3e859..000000000000 --- a/net/nfc/hci/shdlc.c +++ /dev/null @@ -1,918 +0,0 @@ -/* - * Copyright (C) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#define pr_fmt(fmt) "shdlc: %s: " fmt, __func__ - -#include -#include -#include -#include -#include - -#include -#include - -#define SHDLC_LLC_HEAD_ROOM 2 - -#define SHDLC_MAX_WINDOW 4 -#define SHDLC_SREJ_SUPPORT false - -#define SHDLC_CONTROL_HEAD_MASK 0xe0 -#define SHDLC_CONTROL_HEAD_I 0x80 -#define SHDLC_CONTROL_HEAD_I2 0xa0 -#define SHDLC_CONTROL_HEAD_S 0xc0 -#define SHDLC_CONTROL_HEAD_U 0xe0 - -#define SHDLC_CONTROL_NS_MASK 0x38 -#define SHDLC_CONTROL_NR_MASK 0x07 -#define SHDLC_CONTROL_TYPE_MASK 0x18 - -#define SHDLC_CONTROL_M_MASK 0x1f - -enum sframe_type { - S_FRAME_RR = 0x00, - S_FRAME_REJ = 0x01, - S_FRAME_RNR = 0x02, - S_FRAME_SREJ = 0x03 -}; - -enum uframe_modifier { - U_FRAME_UA = 0x06, - U_FRAME_RSET = 0x19 -}; - -#define SHDLC_CONNECT_VALUE_MS 5 -#define SHDLC_T1_VALUE_MS(w) ((5 * w) / 4) -#define SHDLC_T2_VALUE_MS 300 - -#define SHDLC_DUMP_SKB(info, skb) \ -do { \ - pr_debug("%s:\n", info); \ - print_hex_dump(KERN_DEBUG, "shdlc: ", DUMP_PREFIX_OFFSET, \ - 16, 1, skb->data, skb->len, 0); \ -} while (0) - -/* checks x < y <= z modulo 8 */ -static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z) -{ - if (x < z) - return ((x < y) && (y <= z)) ? true : false; - else - return ((y > x) || (y <= z)) ? true : false; -} - -/* checks x <= y < z modulo 8 */ -static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z) -{ - if (x <= z) - return ((x <= y) && (y < z)) ? true : false; - else /* x > z -> z+8 > x */ - return ((y >= x) || (y < z)) ? true : false; -} - -static struct sk_buff *nfc_shdlc_alloc_skb(struct nfc_shdlc *shdlc, - int payload_len) -{ - struct sk_buff *skb; - - skb = alloc_skb(shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM + - shdlc->client_tailroom + payload_len, GFP_KERNEL); - if (skb) - skb_reserve(skb, shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM); - - return skb; -} - -/* immediately sends an S frame. */ -static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, - enum sframe_type sframe_type, int nr) -{ - int r; - struct sk_buff *skb; - - pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr); - - skb = nfc_shdlc_alloc_skb(shdlc, 0); - if (skb == NULL) - return -ENOMEM; - - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; - - r = shdlc->ops->xmit(shdlc, skb); - - kfree_skb(skb); - - return r; -} - -/* immediately sends an U frame. skb may contain optional payload */ -static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc, - struct sk_buff *skb, - enum uframe_modifier uframe_modifier) -{ - int r; - - pr_debug("uframe_modifier=%d\n", uframe_modifier); - - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; - - r = shdlc->ops->xmit(shdlc, skb); - - kfree_skb(skb); - - return r; -} - -/* - * Free ack_pending frames until y_nr - 1, and reset t2 according to - * the remaining oldest ack_pending frame sent time - */ -static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr) -{ - struct sk_buff *skb; - int dnr = shdlc->dnr; /* MUST initially be < y_nr */ - - pr_debug("release ack pending up to frame %d excluded\n", y_nr); - - while (dnr != y_nr) { - pr_debug("release ack pending frame %d\n", dnr); - - skb = skb_dequeue(&shdlc->ack_pending_q); - kfree_skb(skb); - - dnr = (dnr + 1) % 8; - } - - if (skb_queue_empty(&shdlc->ack_pending_q)) { - if (shdlc->t2_active) { - del_timer_sync(&shdlc->t2_timer); - shdlc->t2_active = false; - - pr_debug - ("All sent frames acked. Stopped T2(retransmit)\n"); - } - } else { - skb = skb_peek(&shdlc->ack_pending_q); - - mod_timer(&shdlc->t2_timer, *(unsigned long *)skb->cb + - msecs_to_jiffies(SHDLC_T2_VALUE_MS)); - shdlc->t2_active = true; - - pr_debug - ("Start T2(retransmit) for remaining unacked sent frames\n"); - } -} - -/* - * Receive validated frames from lower layer. skb contains HCI payload only. - * Handle according to algorithm at spec:10.8.2 - */ -static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc, - struct sk_buff *skb, int ns, int nr) -{ - int x_ns = ns; - int y_nr = nr; - - pr_debug("recvd I-frame %d, remote waiting frame %d\n", ns, nr); - - if (shdlc->state != SHDLC_CONNECTED) - goto exit; - - if (x_ns != shdlc->nr) { - nfc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr); - goto exit; - } - - if (shdlc->t1_active == false) { - shdlc->t1_active = true; - mod_timer(&shdlc->t1_timer, - msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w))); - pr_debug("(re)Start T1(send ack)\n"); - } - - if (skb->len) { - nfc_hci_recv_frame(shdlc->hdev, skb); - skb = NULL; - } - - shdlc->nr = (shdlc->nr + 1) % 8; - - if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { - nfc_shdlc_reset_t2(shdlc, y_nr); - - shdlc->dnr = y_nr; - } - -exit: - kfree_skb(skb); -} - -static void nfc_shdlc_rcv_ack(struct nfc_shdlc *shdlc, int y_nr) -{ - pr_debug("remote acked up to frame %d excluded\n", y_nr); - - if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { - nfc_shdlc_reset_t2(shdlc, y_nr); - shdlc->dnr = y_nr; - } -} - -static void nfc_shdlc_requeue_ack_pending(struct nfc_shdlc *shdlc) -{ - struct sk_buff *skb; - - pr_debug("ns reset to %d\n", shdlc->dnr); - - while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) { - skb_pull(skb, 1); /* remove control field */ - skb_queue_head(&shdlc->send_q, skb); - } - shdlc->ns = shdlc->dnr; -} - -static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr) -{ - struct sk_buff *skb; - - pr_debug("remote asks retransmition from frame %d\n", y_nr); - - if (nfc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) { - if (shdlc->t2_active) { - del_timer_sync(&shdlc->t2_timer); - shdlc->t2_active = false; - pr_debug("Stopped T2(retransmit)\n"); - } - - if (shdlc->dnr != y_nr) { - while ((shdlc->dnr = ((shdlc->dnr + 1) % 8)) != y_nr) { - skb = skb_dequeue(&shdlc->ack_pending_q); - kfree_skb(skb); - } - } - - nfc_shdlc_requeue_ack_pending(shdlc); - } -} - -/* See spec RR:10.8.3 REJ:10.8.4 */ -static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc, - enum sframe_type s_frame_type, int nr) -{ - struct sk_buff *skb; - - if (shdlc->state != SHDLC_CONNECTED) - return; - - switch (s_frame_type) { - case S_FRAME_RR: - nfc_shdlc_rcv_ack(shdlc, nr); - if (shdlc->rnr == true) { /* see SHDLC 10.7.7 */ - shdlc->rnr = false; - if (shdlc->send_q.qlen == 0) { - skb = nfc_shdlc_alloc_skb(shdlc, 0); - if (skb) - skb_queue_tail(&shdlc->send_q, skb); - } - } - break; - case S_FRAME_REJ: - nfc_shdlc_rcv_rej(shdlc, nr); - break; - case S_FRAME_RNR: - nfc_shdlc_rcv_ack(shdlc, nr); - shdlc->rnr = true; - break; - default: - break; - } -} - -static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r) -{ - pr_debug("result=%d\n", r); - - del_timer_sync(&shdlc->connect_timer); - - if (r == 0) { - shdlc->ns = 0; - shdlc->nr = 0; - shdlc->dnr = 0; - - shdlc->state = SHDLC_CONNECTED; - } else { - shdlc->state = SHDLC_DISCONNECTED; - } - - shdlc->connect_result = r; - - wake_up(shdlc->connect_wq); -} - -static int nfc_shdlc_connect_initiate(struct nfc_shdlc *shdlc) -{ - struct sk_buff *skb; - - pr_debug("\n"); - - skb = nfc_shdlc_alloc_skb(shdlc, 2); - if (skb == NULL) - return -ENOMEM; - - *skb_put(skb, 1) = SHDLC_MAX_WINDOW; - *skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0; - - return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); -} - -static int nfc_shdlc_connect_send_ua(struct nfc_shdlc *shdlc) -{ - struct sk_buff *skb; - - pr_debug("\n"); - - skb = nfc_shdlc_alloc_skb(shdlc, 0); - if (skb == NULL) - return -ENOMEM; - - return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA); -} - -static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc, - struct sk_buff *skb, - enum uframe_modifier u_frame_modifier) -{ - u8 w = SHDLC_MAX_WINDOW; - bool srej_support = SHDLC_SREJ_SUPPORT; - int r; - - pr_debug("u_frame_modifier=%d\n", u_frame_modifier); - - switch (u_frame_modifier) { - case U_FRAME_RSET: - if (shdlc->state == SHDLC_NEGOCIATING) { - /* we sent RSET, but chip wants to negociate */ - if (skb->len > 0) - w = skb->data[0]; - - if (skb->len > 1) - srej_support = skb->data[1] & 0x01 ? true : - false; - - if ((w <= SHDLC_MAX_WINDOW) && - (SHDLC_SREJ_SUPPORT || (srej_support == false))) { - shdlc->w = w; - shdlc->srej_support = srej_support; - r = nfc_shdlc_connect_send_ua(shdlc); - nfc_shdlc_connect_complete(shdlc, r); - } - } else if (shdlc->state == SHDLC_CONNECTED) { - /* - * Chip wants to reset link. This is unexpected and - * unsupported. - */ - shdlc->hard_fault = -ECONNRESET; - } - break; - case U_FRAME_UA: - if ((shdlc->state == SHDLC_CONNECTING && - shdlc->connect_tries > 0) || - (shdlc->state == SHDLC_NEGOCIATING)) - nfc_shdlc_connect_complete(shdlc, 0); - break; - default: - break; - } - - kfree_skb(skb); -} - -static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc) -{ - struct sk_buff *skb; - u8 control; - int nr; - int ns; - enum sframe_type s_frame_type; - enum uframe_modifier u_frame_modifier; - - if (shdlc->rcv_q.qlen) - pr_debug("rcvQlen=%d\n", shdlc->rcv_q.qlen); - - while ((skb = skb_dequeue(&shdlc->rcv_q)) != NULL) { - control = skb->data[0]; - skb_pull(skb, 1); - switch (control & SHDLC_CONTROL_HEAD_MASK) { - case SHDLC_CONTROL_HEAD_I: - case SHDLC_CONTROL_HEAD_I2: - ns = (control & SHDLC_CONTROL_NS_MASK) >> 3; - nr = control & SHDLC_CONTROL_NR_MASK; - nfc_shdlc_rcv_i_frame(shdlc, skb, ns, nr); - break; - case SHDLC_CONTROL_HEAD_S: - s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3; - nr = control & SHDLC_CONTROL_NR_MASK; - nfc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr); - kfree_skb(skb); - break; - case SHDLC_CONTROL_HEAD_U: - u_frame_modifier = control & SHDLC_CONTROL_M_MASK; - nfc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier); - break; - default: - pr_err("UNKNOWN Control=%d\n", control); - kfree_skb(skb); - break; - } - } -} - -static int nfc_shdlc_w_used(int ns, int dnr) -{ - int unack_count; - - if (dnr <= ns) - unack_count = ns - dnr; - else - unack_count = 8 - dnr + ns; - - return unack_count; -} - -/* Send frames according to algorithm at spec:10.8.1 */ -static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) -{ - struct sk_buff *skb; - int r; - unsigned long time_sent; - - if (shdlc->send_q.qlen) - pr_debug - ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", - shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, - shdlc->rnr == false ? "false" : "true", - shdlc->w - nfc_shdlc_w_used(shdlc->ns, shdlc->dnr), - shdlc->ack_pending_q.qlen); - - while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w && - (shdlc->rnr == false)) { - - if (shdlc->t1_active) { - del_timer_sync(&shdlc->t1_timer); - shdlc->t1_active = false; - pr_debug("Stopped T1(send ack)\n"); - } - - skb = skb_dequeue(&shdlc->send_q); - - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) | - shdlc->nr; - - pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns, - shdlc->nr); - /* SHDLC_DUMP_SKB("shdlc frame written", skb); */ - - r = shdlc->ops->xmit(shdlc, skb); - if (r < 0) { - shdlc->hard_fault = r; - break; - } - - shdlc->ns = (shdlc->ns + 1) % 8; - - time_sent = jiffies; - *(unsigned long *)skb->cb = time_sent; - - skb_queue_tail(&shdlc->ack_pending_q, skb); - - if (shdlc->t2_active == false) { - shdlc->t2_active = true; - mod_timer(&shdlc->t2_timer, time_sent + - msecs_to_jiffies(SHDLC_T2_VALUE_MS)); - pr_debug("Started T2 (retransmit)\n"); - } - } -} - -static void nfc_shdlc_connect_timeout(unsigned long data) -{ - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; - - pr_debug("\n"); - - queue_work(system_nrt_wq, &shdlc->sm_work); -} - -static void nfc_shdlc_t1_timeout(unsigned long data) -{ - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; - - pr_debug("SoftIRQ: need to send ack\n"); - - queue_work(system_nrt_wq, &shdlc->sm_work); -} - -static void nfc_shdlc_t2_timeout(unsigned long data) -{ - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; - - pr_debug("SoftIRQ: need to retransmit\n"); - - queue_work(system_nrt_wq, &shdlc->sm_work); -} - -static void nfc_shdlc_sm_work(struct work_struct *work) -{ - struct nfc_shdlc *shdlc = container_of(work, struct nfc_shdlc, sm_work); - int r; - - pr_debug("\n"); - - mutex_lock(&shdlc->state_mutex); - - switch (shdlc->state) { - case SHDLC_DISCONNECTED: - skb_queue_purge(&shdlc->rcv_q); - skb_queue_purge(&shdlc->send_q); - skb_queue_purge(&shdlc->ack_pending_q); - break; - case SHDLC_CONNECTING: - if (shdlc->hard_fault) { - nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); - break; - } - - if (shdlc->connect_tries++ < 5) - r = nfc_shdlc_connect_initiate(shdlc); - else - r = -ETIME; - if (r < 0) - nfc_shdlc_connect_complete(shdlc, r); - else { - mod_timer(&shdlc->connect_timer, jiffies + - msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS)); - - shdlc->state = SHDLC_NEGOCIATING; - } - break; - case SHDLC_NEGOCIATING: - if (timer_pending(&shdlc->connect_timer) == 0) { - shdlc->state = SHDLC_CONNECTING; - queue_work(system_nrt_wq, &shdlc->sm_work); - } - - nfc_shdlc_handle_rcv_queue(shdlc); - - if (shdlc->hard_fault) { - nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); - break; - } - break; - case SHDLC_CONNECTED: - nfc_shdlc_handle_rcv_queue(shdlc); - nfc_shdlc_handle_send_queue(shdlc); - - if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) { - pr_debug - ("Handle T1(send ack) elapsed (T1 now inactive)\n"); - - shdlc->t1_active = false; - r = nfc_shdlc_send_s_frame(shdlc, S_FRAME_RR, - shdlc->nr); - if (r < 0) - shdlc->hard_fault = r; - } - - if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) { - pr_debug - ("Handle T2(retransmit) elapsed (T2 inactive)\n"); - - shdlc->t2_active = false; - - nfc_shdlc_requeue_ack_pending(shdlc); - nfc_shdlc_handle_send_queue(shdlc); - } - - if (shdlc->hard_fault) { - nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault); - } - break; - default: - break; - } - mutex_unlock(&shdlc->state_mutex); -} - -/* - * Called from syscall context to establish shdlc link. Sleeps until - * link is ready or failure. - */ -static int nfc_shdlc_connect(struct nfc_shdlc *shdlc) -{ - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); - - pr_debug("\n"); - - mutex_lock(&shdlc->state_mutex); - - shdlc->state = SHDLC_CONNECTING; - shdlc->connect_wq = &connect_wq; - shdlc->connect_tries = 0; - shdlc->connect_result = 1; - - mutex_unlock(&shdlc->state_mutex); - - queue_work(system_nrt_wq, &shdlc->sm_work); - - wait_event(connect_wq, shdlc->connect_result != 1); - - return shdlc->connect_result; -} - -static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc) -{ - pr_debug("\n"); - - mutex_lock(&shdlc->state_mutex); - - shdlc->state = SHDLC_DISCONNECTED; - - mutex_unlock(&shdlc->state_mutex); - - queue_work(system_nrt_wq, &shdlc->sm_work); -} - -/* - * Receive an incoming shdlc frame. Frame has already been crc-validated. - * skb contains only LLC header and payload. - * If skb == NULL, it is a notification that the link below is dead. - */ -void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb) -{ - if (skb == NULL) { - pr_err("NULL Frame -> link is dead\n"); - shdlc->hard_fault = -EREMOTEIO; - } else { - SHDLC_DUMP_SKB("incoming frame", skb); - skb_queue_tail(&shdlc->rcv_q, skb); - } - - queue_work(system_nrt_wq, &shdlc->sm_work); -} -EXPORT_SYMBOL(nfc_shdlc_recv_frame); - -static int nfc_shdlc_open(struct nfc_hci_dev *hdev) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - int r; - - pr_debug("\n"); - - if (shdlc->ops->open) { - r = shdlc->ops->open(shdlc); - if (r < 0) - return r; - } - - r = nfc_shdlc_connect(shdlc); - if (r < 0 && shdlc->ops->close) - shdlc->ops->close(shdlc); - - return r; -} - -static void nfc_shdlc_close(struct nfc_hci_dev *hdev) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - nfc_shdlc_disconnect(shdlc); - - if (shdlc->ops->close) - shdlc->ops->close(shdlc); -} - -static int nfc_shdlc_hci_ready(struct nfc_hci_dev *hdev) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - int r = 0; - - pr_debug("\n"); - - if (shdlc->ops->hci_ready) - r = shdlc->ops->hci_ready(shdlc); - - return r; -} - -static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - SHDLC_DUMP_SKB("queuing HCP packet to shdlc", skb); - - skb_queue_tail(&shdlc->send_q, skb); - - queue_work(system_nrt_wq, &shdlc->sm_work); - - return 0; -} - -static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, - u32 im_protocols, u32 tm_protocols) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - if (shdlc->ops->start_poll) - return shdlc->ops->start_poll(shdlc, - im_protocols, tm_protocols); - - return 0; -} - -static int nfc_shdlc_target_from_gate(struct nfc_hci_dev *hdev, u8 gate, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->target_from_gate) - return shdlc->ops->target_from_gate(shdlc, gate, target); - - return -EPERM; -} - -static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev, - u8 gate, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - if (shdlc->ops->complete_target_discovered) - return shdlc->ops->complete_target_discovered(shdlc, gate, - target); - - return 0; -} - -static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev, - struct nfc_target *target, - struct sk_buff *skb, - data_exchange_cb_t cb, void *cb_context) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->data_exchange) - return shdlc->ops->data_exchange(shdlc, target, skb, cb, - cb_context); - - return -EPERM; -} - -static int nfc_shdlc_check_presence(struct nfc_hci_dev *hdev, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->check_presence) - return shdlc->ops->check_presence(shdlc, target); - - return 0; -} - -static struct nfc_hci_ops shdlc_ops = { - .open = nfc_shdlc_open, - .close = nfc_shdlc_close, - .hci_ready = nfc_shdlc_hci_ready, - .xmit = nfc_shdlc_xmit, - .start_poll = nfc_shdlc_start_poll, - .target_from_gate = nfc_shdlc_target_from_gate, - .complete_target_discovered = nfc_shdlc_complete_target_discovered, - .data_exchange = nfc_shdlc_data_exchange, - .check_presence = nfc_shdlc_check_presence, -}; - -struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, - struct nfc_hci_init_data *init_data, - u32 protocols, - int tx_headroom, int tx_tailroom, - int max_link_payload, const char *devname) -{ - struct nfc_shdlc *shdlc; - int r; - - if (ops->xmit == NULL) - return NULL; - - shdlc = kzalloc(sizeof(struct nfc_shdlc), GFP_KERNEL); - if (shdlc == NULL) - return NULL; - - mutex_init(&shdlc->state_mutex); - shdlc->ops = ops; - shdlc->state = SHDLC_DISCONNECTED; - - init_timer(&shdlc->connect_timer); - shdlc->connect_timer.data = (unsigned long)shdlc; - shdlc->connect_timer.function = nfc_shdlc_connect_timeout; - - init_timer(&shdlc->t1_timer); - shdlc->t1_timer.data = (unsigned long)shdlc; - shdlc->t1_timer.function = nfc_shdlc_t1_timeout; - - init_timer(&shdlc->t2_timer); - shdlc->t2_timer.data = (unsigned long)shdlc; - shdlc->t2_timer.function = nfc_shdlc_t2_timeout; - - shdlc->w = SHDLC_MAX_WINDOW; - shdlc->srej_support = SHDLC_SREJ_SUPPORT; - - skb_queue_head_init(&shdlc->rcv_q); - skb_queue_head_init(&shdlc->send_q); - skb_queue_head_init(&shdlc->ack_pending_q); - - INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work); - - shdlc->client_headroom = tx_headroom; - shdlc->client_tailroom = tx_tailroom; - - shdlc->hdev = nfc_hci_allocate_device(&shdlc_ops, init_data, protocols, - tx_headroom + SHDLC_LLC_HEAD_ROOM, - tx_tailroom, - max_link_payload); - if (shdlc->hdev == NULL) - goto err_allocdev; - - nfc_hci_set_clientdata(shdlc->hdev, shdlc); - - r = nfc_hci_register_device(shdlc->hdev); - if (r < 0) - goto err_regdev; - - return shdlc; - -err_regdev: - nfc_hci_free_device(shdlc->hdev); - -err_allocdev: - kfree(shdlc); - - return NULL; -} -EXPORT_SYMBOL(nfc_shdlc_allocate); - -void nfc_shdlc_free(struct nfc_shdlc *shdlc) -{ - pr_debug("\n"); - - nfc_hci_unregister_device(shdlc->hdev); - nfc_hci_free_device(shdlc->hdev); - - cancel_work_sync(&shdlc->sm_work); - - skb_queue_purge(&shdlc->rcv_q); - skb_queue_purge(&shdlc->send_q); - skb_queue_purge(&shdlc->ack_pending_q); - - kfree(shdlc); -} -EXPORT_SYMBOL(nfc_shdlc_free); - -void nfc_shdlc_set_clientdata(struct nfc_shdlc *shdlc, void *clientdata) -{ - pr_debug("\n"); - - shdlc->clientdata = clientdata; -} -EXPORT_SYMBOL(nfc_shdlc_set_clientdata); - -void *nfc_shdlc_get_clientdata(struct nfc_shdlc *shdlc) -{ - return shdlc->clientdata; -} -EXPORT_SYMBOL(nfc_shdlc_get_clientdata); - -struct nfc_hci_dev *nfc_shdlc_get_hci_dev(struct nfc_shdlc *shdlc) -{ - return shdlc->hdev; -} -EXPORT_SYMBOL(nfc_shdlc_get_hci_dev); -- cgit v1.2.1 From f4f20d0650e9cd13f65b5e7f93af126b526af721 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 18 Sep 2012 19:17:33 +0200 Subject: NFC: Remove unneeded LLC symbols export After fixing the LLC Makefile, we no longer need those exports. Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc.c | 14 +------------- net/nfc/hci/llc_nop.c | 2 -- net/nfc/hci/llc_shdlc.c | 2 -- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index bd11b0f7658a..ae1205ded87f 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -18,8 +18,8 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include #include + #include "llc.h" static struct list_head llc_engines; @@ -44,7 +44,6 @@ exit: nfc_llc_exit(); return r; } -EXPORT_SYMBOL(nfc_llc_init); void nfc_llc_exit(void) { @@ -56,7 +55,6 @@ void nfc_llc_exit(void) kfree(llc_engine); } } -EXPORT_SYMBOL(nfc_llc_exit); int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) { @@ -78,7 +76,6 @@ int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) return 0; } -EXPORT_SYMBOL(nfc_llc_register); static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) { @@ -104,7 +101,6 @@ void nfc_llc_unregister(const char *name) kfree(llc_engine->name); kfree(llc_engine); } -EXPORT_SYMBOL(nfc_llc_unregister); struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, @@ -134,14 +130,12 @@ struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, return llc; } -EXPORT_SYMBOL(nfc_llc_allocate); void nfc_llc_free(struct nfc_llc *llc) { llc->ops->deinit(llc); kfree(llc); } -EXPORT_SYMBOL(nfc_llc_free); inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, int *rx_tailroom) @@ -149,34 +143,28 @@ inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, *rx_headroom = llc->rx_headroom; *rx_tailroom = llc->rx_tailroom; } -EXPORT_SYMBOL(nfc_llc_get_rx_head_tail_room); inline int nfc_llc_start(struct nfc_llc *llc) { return llc->ops->start(llc); } -EXPORT_SYMBOL(nfc_llc_start); inline int nfc_llc_stop(struct nfc_llc *llc) { return llc->ops->stop(llc); } -EXPORT_SYMBOL(nfc_llc_stop); inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) { llc->ops->rcv_from_drv(llc, skb); } -EXPORT_SYMBOL(nfc_llc_rcv_from_drv); inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) { return llc->ops->xmit_from_hci(llc, skb); } -EXPORT_SYMBOL(nfc_llc_xmit_from_hci); inline void *nfc_llc_get_data(struct nfc_llc *llc) { return llc->data; } -EXPORT_SYMBOL(nfc_llc_get_data); diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index ec627cee12cd..d91dd66a082d 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -19,7 +19,6 @@ */ #include -#include #include "llc.h" @@ -98,4 +97,3 @@ int nfc_llc_nop_register() { return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops); } -EXPORT_SYMBOL(nfc_llc_nop_register); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index fad6cd18d613..3761c7490a61 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -831,4 +830,3 @@ int nfc_llc_shdlc_register() { return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops); } -EXPORT_SYMBOL(nfc_llc_shdlc_register); -- cgit v1.2.1 From a7d0281bbf7e1b0eff2de3c7ed6a5ffce6b62dac Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 18 Sep 2012 19:24:37 +0200 Subject: NFC: Fix LLC registration definitions for ANSI compliance Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc_nop.c | 2 +- net/nfc/hci/llc_shdlc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index d91dd66a082d..87b10291b40f 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -93,7 +93,7 @@ static struct nfc_llc_ops llc_nop_ops = { .xmit_from_hci = llc_nop_xmit_from_hci, }; -int nfc_llc_nop_register() +int nfc_llc_nop_register(void) { return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops); } diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 3761c7490a61..d54bfc51c693 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -826,7 +826,7 @@ static struct nfc_llc_ops llc_shdlc_ops = { .xmit_from_hci = llc_shdlc_xmit_from_hci, }; -int nfc_llc_shdlc_register() +int nfc_llc_shdlc_register(void) { return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops); } -- cgit v1.2.1 From 80faa59847108d48928fac41b4817ef21ca108a9 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 18 Sep 2012 19:25:38 +0200 Subject: NFC: Add HCI module description Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index c1129c22d835..d378d93de62e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -868,3 +868,4 @@ subsys_initcall(nfc_hci_init); module_exit(nfc_hci_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFC HCI Core"); -- cgit v1.2.1 From 9010e39f508ee57dc9a7675073659cb9d019a802 Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Thu, 20 Sep 2012 08:59:11 +0200 Subject: NFC: Handle RSET in SHDLC_CONNECTING state As queue_work() does not guarantee immediate execution of sm_work it can happen in crossover RSET usecase that connect timer will constantly change the shdlc state from NEGOTIATING to CONNECTING before shdlc has chance to handle incoming frame. Signed-off-by: Waldemar Rymarkiewicz Acked-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc_shdlc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index d54bfc51c693..a7931c7c57f2 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -414,7 +414,8 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, switch (u_frame_modifier) { case U_FRAME_RSET: - if (shdlc->state == SHDLC_NEGOCIATING) { + if ((shdlc->state == SHDLC_NEGOCIATING) || + (shdlc->state == SHDLC_CONNECTING)) { /* we sent RSET, but chip wants to negociate */ if (skb->len > 0) w = skb->data[0]; -- cgit v1.2.1 From 12bfd1e8906637757a0bc42970f0fc11a9918019 Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Fri, 21 Sep 2012 14:02:46 +0200 Subject: NFC: Don't handle consequent RSET frames after UA During processing incoming RSET frame chip, possibly due to its internal timout, can retrnasmit an another RSET which is next queued for processing in shdlc layer. In case when we accept processed RSET skip those remaining on the rcv queue until chip will send it's first S or I frame. This will mean the chip completed connection as well. Signed-off-by: Waldemar Rymarkiewicz Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc_shdlc.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index a7931c7c57f2..3afde1ecd925 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -32,7 +32,8 @@ enum shdlc_state { SHDLC_DISCONNECTED = 0, SHDLC_CONNECTING = 1, SHDLC_NEGOCIATING = 2, - SHDLC_CONNECTED = 3 + SHDLC_HALF_CONNECTED = 3, + SHDLC_CONNECTED = 4 }; struct llc_shdlc { @@ -363,7 +364,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) shdlc->nr = 0; shdlc->dnr = 0; - shdlc->state = SHDLC_CONNECTED; + shdlc->state = SHDLC_HALF_CONNECTED; } else { shdlc->state = SHDLC_DISCONNECTED; } @@ -414,9 +415,13 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, switch (u_frame_modifier) { case U_FRAME_RSET: - if ((shdlc->state == SHDLC_NEGOCIATING) || - (shdlc->state == SHDLC_CONNECTING)) { - /* we sent RSET, but chip wants to negociate */ + switch (shdlc->state) { + case SHDLC_NEGOCIATING: + case SHDLC_CONNECTING: + /* + * We sent RSET, but chip wants to negociate or we + * got RSET before we managed to send out our. + */ if (skb->len > 0) w = skb->data[0]; @@ -431,19 +436,31 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, r = llc_shdlc_connect_send_ua(shdlc); llc_shdlc_connect_complete(shdlc, r); } - } else if (shdlc->state == SHDLC_CONNECTED) { + break; + case SHDLC_HALF_CONNECTED: + /* + * Chip resent RSET due to its timeout - Ignote it + * as we already sent UA. + */ + break; + case SHDLC_CONNECTED: /* * Chip wants to reset link. This is unexpected and * unsupported. */ shdlc->hard_fault = -ECONNRESET; + break; + default: + break; } break; case U_FRAME_UA: if ((shdlc->state == SHDLC_CONNECTING && shdlc->connect_tries > 0) || - (shdlc->state == SHDLC_NEGOCIATING)) + (shdlc->state == SHDLC_NEGOCIATING)) { llc_shdlc_connect_complete(shdlc, 0); + shdlc->state = SHDLC_CONNECTED; + } break; default: break; @@ -470,11 +487,17 @@ static void llc_shdlc_handle_rcv_queue(struct llc_shdlc *shdlc) switch (control & SHDLC_CONTROL_HEAD_MASK) { case SHDLC_CONTROL_HEAD_I: case SHDLC_CONTROL_HEAD_I2: + if (shdlc->state == SHDLC_HALF_CONNECTED) + shdlc->state = SHDLC_CONNECTED; + ns = (control & SHDLC_CONTROL_NS_MASK) >> 3; nr = control & SHDLC_CONTROL_NR_MASK; llc_shdlc_rcv_i_frame(shdlc, skb, ns, nr); break; case SHDLC_CONTROL_HEAD_S: + if (shdlc->state == SHDLC_HALF_CONNECTED) + shdlc->state = SHDLC_CONNECTED; + s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3; nr = control & SHDLC_CONTROL_NR_MASK; llc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr); @@ -633,6 +656,7 @@ static void llc_shdlc_sm_work(struct work_struct *work) break; } break; + case SHDLC_HALF_CONNECTED: case SHDLC_CONNECTED: llc_shdlc_handle_rcv_queue(shdlc); llc_shdlc_handle_send_queue(shdlc); -- cgit v1.2.1 From 4c0ba9ac4bf5f20ada774f5d181d03044e0147e7 Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Mon, 24 Sep 2012 08:07:07 +0200 Subject: NFC: Fix typo negociating -> negotiating Signed-off-by: Waldemar Rymarkiewicz Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc_shdlc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 3afde1ecd925..8f69d791dcb3 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -31,7 +31,7 @@ enum shdlc_state { SHDLC_DISCONNECTED = 0, SHDLC_CONNECTING = 1, - SHDLC_NEGOCIATING = 2, + SHDLC_NEGOTIATING = 2, SHDLC_HALF_CONNECTED = 3, SHDLC_CONNECTED = 4 }; @@ -416,7 +416,7 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, switch (u_frame_modifier) { case U_FRAME_RSET: switch (shdlc->state) { - case SHDLC_NEGOCIATING: + case SHDLC_NEGOTIATING: case SHDLC_CONNECTING: /* * We sent RSET, but chip wants to negociate or we @@ -457,7 +457,7 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, case U_FRAME_UA: if ((shdlc->state == SHDLC_CONNECTING && shdlc->connect_tries > 0) || - (shdlc->state == SHDLC_NEGOCIATING)) { + (shdlc->state == SHDLC_NEGOTIATING)) { llc_shdlc_connect_complete(shdlc, 0); shdlc->state = SHDLC_CONNECTED; } @@ -640,10 +640,10 @@ static void llc_shdlc_sm_work(struct work_struct *work) mod_timer(&shdlc->connect_timer, jiffies + msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS)); - shdlc->state = SHDLC_NEGOCIATING; + shdlc->state = SHDLC_NEGOTIATING; } break; - case SHDLC_NEGOCIATING: + case SHDLC_NEGOTIATING: if (timer_pending(&shdlc->connect_timer) == 0) { shdlc->state = SHDLC_CONNECTING; queue_work(system_nrt_wq, &shdlc->sm_work); -- cgit v1.2.1 From 8489c1d9a80e6263fb71de5bc7f39773712cfa7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 08:21:16 +0200 Subject: net: raw: revert unrelated change Commit 5640f7685831 ("net: use a per task frag allocator") accidentally contained an unrelated change to net/ipv4/raw.c, later committed (without the pr_err() debugging bits) in net tree as commit ab43ed8b749 (ipv4: raw: fix icmp_filter()) This patch reverts this glitch, noticed by Stephen Rothwell. Reported-by: Stephen Rothwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a80740ba4248..f2425785d40a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,23 +131,18 @@ found: * 0 - deliver * 1 - block */ -static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) +static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) { - struct icmphdr _hdr; - const struct icmphdr *hdr; - - pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n", - skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len); - hdr = skb_header_pointer(skb, skb_transport_offset(skb), - sizeof(_hdr), &_hdr); - pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1); - if (!hdr) + int type; + + if (!pskb_may_pull(skb, sizeof(struct icmphdr))) return 1; - if (hdr->type < 32) { + type = icmp_hdr(skb)->type; + if (type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1U << hdr->type) & data) != 0; + return ((1 << type) & data) != 0; } /* Do not block unknown ICMP types */ -- cgit v1.2.1 From 64629b9d412544b0ed744405944fd6edf79d7e0d Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Sun, 23 Sep 2012 09:49:54 +0200 Subject: cfg80211: Fix regulatory check for 60GHz band frequencies The current regulatory code on cfg80211 performs a check to see if a regulatory rule belongs to an IEEE band so that if a Country IE is received and no rules are specified for a band (which is allowed by IEEE) those bands are left intact. The current band check assumes a rule is bound to a band if the rule's start or end frequency is less than 2 GHz apart from the center of frequency being inspected. In order to support 60 GHz for 802.11ad we need to increase this to account for the channel spacing of 2160 MHz whereby a channel somewhere in the middle of a regulatory rule may be more than 2 GHz apart from either the beginning or end of the frequency rule. Without a fix for this even though channels 1-3 are allowed world wide on the rule (57240 - 63720 @ 2160), channel 2 at 60480 MHz will end up getting disabled given that it is 3240 MHz from both the frequency rule start and end frequency. Fix this by using 2 GHz separation assumption for the 2.4 and 5 GHz bands but for 60 GHz use a 10 GHz separation before assuming a rule is not part of the band. Since we have no 802.11ad drivers yet merged this change has no impact to existing Linux upstream device drivers. Signed-off-by: Vladimir Kondratiev Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/wireless/reg.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0ba3328dcc9a..844823973daf 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -504,9 +504,11 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * * This lets us know if a specific frequency rule is or is not relevant to * a specific frequency's band. Bands are device specific and artificial - * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is - * safe for now to assume that a frequency rule should not be part of a - * frequency's band if the start freq or end freq are off by more than 2 GHz. + * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), + * however it is safe for now to assume that a frequency rule should not be + * part of a frequency's band if the start freq or end freq are off by more + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". **/ @@ -514,9 +516,16 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, u32 freq_khz) { #define ONE_GHZ_IN_KHZ 1000000 - if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + /* + * From 802.11ad: directional multi-gigabit (DMG): + * Pertaining to operation in a frequency band containing a channel + * with the Channel starting frequency above 45 GHz. + */ + u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? + 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; - if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + if (abs(freq_khz - freq_range->end_freq_khz) <= limit) return true; return false; #undef ONE_GHZ_IN_KHZ -- cgit v1.2.1 From 1b05c4b50edbddbdde715c4a7350629819f6655e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: ipv6: raw: fix icmpv6_filter() icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..4a5f78b50495 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -- cgit v1.2.1 From 842b08bbee448b2069aaebb7f18b40942ad2a1bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Sep 2012 22:09:58 +0000 Subject: ipconfig: fix trivial build error The commit 5e953778a2aab04929a5e7b69f53dc26e39b079e ("ipconfig: add nameserver IPs to kernel-parameter ip=") introduces ic_nameservers_predef() that defined only for BOOTP. However it is used by ip_auto_config_setup() as well. This patch moves it outside of #ifdef BOOTP. Signed-off-by: Andy Shevchenko Cc: Christoph Fritz Cc: David S. Miller Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 1c0e7e051044..798358b10717 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -582,6 +582,17 @@ static void __init ic_rarp_send_if(struct ic_device *d) } #endif +/* + * Predefine Nameservers + */ +static inline void __init ic_nameservers_predef(void) +{ + int i; + + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + ic_nameservers[i] = NONE; +} + /* * DHCP/BOOTP support. */ @@ -742,17 +753,6 @@ static void __init ic_bootp_init_ext(u8 *e) } -/* - * Predefine Nameservers - */ -static inline void __init ic_nameservers_predef(void) -{ - int i; - - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) - ic_nameservers[i] = NONE; -} - /* * Initialize the DHCP/BOOTP mechanism. */ -- cgit v1.2.1 From 96af69ea2a83d292238bdba20e4508ee967cf8cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: ipv6: mip6: fix mip6_mh_filter() mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c31d87b..0f9bdc5ee9f3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } -- cgit v1.2.1 From 82e6bfe2fbc4d48852114c4f979137cd5bf1d1a8 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 22:26:52 +0000 Subject: netfilter: xt_limit: have r->cost != 0 case work Commit v2.6.19-rc1~1272^2~41 tells us that r->cost != 0 can happen when a running state is saved to userspace and then reinstated from there. Make sure that private xt_limit area is initialized with correct values. Otherwise, random matchings due to use of uninitialized memory. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_limit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8ab309..a4c1e4528cac 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } -- cgit v1.2.1 From 28407630513b1a86133db0ef8b39fabad6c494af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 23:54:15 -0400 Subject: take descriptor handling from sock_alloc_file() to callers Signed-off-by: Al Viro --- net/socket.c | 62 +++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index edc3c4af9085..a14ec19164b6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -346,22 +346,15 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags) +static struct file *sock_alloc_file(struct socket *sock, int flags) { struct qstr name = { .name = "" }; struct path path; struct file *file; - int fd; - - fd = get_unused_fd_flags(flags); - if (unlikely(fd < 0)) - return fd; path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); - if (unlikely(!path.dentry)) { - put_unused_fd(fd); - return -ENOMEM; - } + if (unlikely(!path.dentry)) + return ERR_PTR(-ENOMEM); path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); @@ -373,28 +366,31 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) /* drop dentry, keep inode */ ihold(path.dentry->d_inode); path_put(&path); - put_unused_fd(fd); - return -ENFILE; + return ERR_PTR(-ENFILE); } sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; - - *f = file; - return fd; + return file; } int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags); + int fd = get_unused_fd_flags(flags); + if (unlikely(fd < 0)) + return fd; - if (likely(fd >= 0)) + newfile = sock_alloc_file(sock, flags); + if (likely(!IS_ERR(newfile))) { fd_install(fd, newfile); + return fd; + } - return fd; + put_unused_fd(fd); + return PTR_ERR(newfile); } EXPORT_SYMBOL(sock_map_fd); @@ -1394,17 +1390,32 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags); + fd1 = get_unused_fd_flags(flags); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - - fd2 = sock_alloc_file(sock2, &newfile2, flags); + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; + put_unused_fd(fd1); + goto out_release_both; + } + + newfile1 = sock_alloc_file(sock1, flags); + if (unlikely(IS_ERR(newfile1))) { + err = PTR_ERR(newfile1); + put_unused_fd(fd1); + put_unused_fd(fd2); + goto out_release_both; + } + + newfile2 = sock_alloc_file(sock2, flags); + if (IS_ERR(newfile2)) { + err = PTR_ERR(newfile2); fput(newfile1); put_unused_fd(fd1); + put_unused_fd(fd2); sock_release(sock2); goto out; } @@ -1536,12 +1547,19 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags); + newfd = get_unused_fd_flags(flags); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } + newfile = sock_alloc_file(newsock, flags); + if (unlikely(IS_ERR(newfile))) { + err = PTR_ERR(newfile); + put_unused_fd(newfd); + sock_release(newsock); + goto out_put; + } err = security_socket_accept(sock, newsock); if (err) -- cgit v1.2.1 From 56b31d1c9f1e6a3ad92e7bfe252721e05d92b285 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Aug 2012 00:25:51 -0400 Subject: unexport sock_map_fd(), switch to sock_alloc_file() Both modular callers of sock_map_fd() had been buggy; sctp one leaks descriptor and file if copy_to_user() fails, 9p one shouldn't be exposing file in the descriptor table at all. Switch both to sock_alloc_file(), export it, unexport sock_map_fd() and make it static. Signed-off-by: Al Viro --- net/9p/trans_fd.c | 16 +++++++--------- net/sctp/socket.c | 25 ++++++++++++++++++++----- net/socket.c | 6 +++--- 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6449bae15702..8c4e0b538a8a 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -793,30 +793,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; - int ret, fd; + struct file *file; + int ret; p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket, 0); - if (fd < 0) { + file = sock_alloc_file(csocket, 0); + if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); - return fd; + return PTR_ERR(file); } - get_file(csocket->file); - get_file(csocket->file); - p->wr = p->rd = csocket->file; + get_file(file); + p->wr = p->rd = file; client->trans = p; client->status = Connected; - sys_close(fd); /* still racy */ - p->rd->f_flags |= O_NONBLOCK; p->conn = p9_conn_create(client); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5e259817a7f3..fb5931ca50d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -4276,6 +4277,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval { sctp_peeloff_arg_t peeloff; struct socket *newsock; + struct file *newfile; int retval = 0; if (len < sizeof(sctp_peeloff_arg_t)) @@ -4289,22 +4291,35 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = sock_map_fd(newsock, 0); + retval = get_unused_fd(); if (retval < 0) { sock_release(newsock); goto out; } + newfile = sock_alloc_file(newsock, 0); + if (unlikely(IS_ERR(newfile))) { + put_unused_fd(retval); + sock_release(newsock); + return PTR_ERR(newfile); + } + SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", __func__, sk, newsock->sk, retval); /* Return the fd mapped to the new socket. */ + if (put_user(len, optlen)) { + fput(newfile); + put_unused_fd(retval); + return -EFAULT; + } peeloff.sd = retval; - if (put_user(len, optlen)) + if (copy_to_user(optval, &peeloff, len)) { + fput(newfile); + put_unused_fd(retval); return -EFAULT; - if (copy_to_user(optval, &peeloff, len)) - retval = -EFAULT; - + } + fd_install(retval, newfile); out: return retval; } diff --git a/net/socket.c b/net/socket.c index a14ec19164b6..38a14311f3a6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -346,7 +346,7 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static struct file *sock_alloc_file(struct socket *sock, int flags) +struct file *sock_alloc_file(struct socket *sock, int flags) { struct qstr name = { .name = "" }; struct path path; @@ -375,8 +375,9 @@ static struct file *sock_alloc_file(struct socket *sock, int flags) file->private_data = sock; return file; } +EXPORT_SYMBOL(sock_alloc_file); -int sock_map_fd(struct socket *sock, int flags) +static int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; int fd = get_unused_fd_flags(flags); @@ -392,7 +393,6 @@ int sock_map_fd(struct socket *sock, int flags) put_unused_fd(fd); return PTR_ERR(newfile); } -EXPORT_SYMBOL(sock_map_fd); struct socket *sock_from_file(struct file *file, int *err) { -- cgit v1.2.1 From c3c073f808b22dfae15ef8412b6f7b998644139a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:32:06 -0400 Subject: new helper: iterate_fd() iterates through the opened files in given descriptor table, calling a supplied function; we stop once non-zero is returned. Callback gets struct file *, descriptor number and const void * argument passed to iterator. It is called with files->file_lock held, so it is not allowed to block. tty_io, netprio_cgroup and selinux flush_unauthorized_files() converted to its use. Signed-off-by: Al Viro --- net/core/netprio_cgroup.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..5ffd084c6a83 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -272,38 +272,24 @@ out_free_devname: return ret; } +static int update_netprio(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + return 0; +} + void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; + void *v; cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - struct file *file; - struct socket *sock; - int err; - - file = fcheck_files(files, fd); - if (!file) - continue; - - sock = sock_from_file(file, &err); - if (sock) - sock_update_netprioidx(sock->sk, p); - } - spin_unlock(&files->file_lock); + v = (void *)(unsigned long)task_netprioidx(p); + iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } -- cgit v1.2.1 From cb0942b81249798e15c3f04eee2946ef543e8115 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 14:48:26 -0400 Subject: make get_file() return its argument simplifies a bunch of callers... Signed-off-by: Al Viro --- net/compat.c | 3 +-- net/core/scm.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 74ed1d7a84a2..79ae88485001 100644 --- a/net/compat.c +++ b/net/compat.c @@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) { diff --git a/net/core/scm.c b/net/core/scm.c index 040cebeed45b..b0098d259233 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -281,11 +281,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); sock = sock_from_file(fp[i], &err); if (sock) sock_update_netprioidx(sock->sk, current); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) -- cgit v1.2.1 From fe235b58d517d623bf6d40c77afca1b0ee6fc85d Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 25 Sep 2012 12:42:50 +0200 Subject: NFC: Use dynamic initialization for rwlocks If rwlock is dynamically allocated but statically initialized it is missing proper lockdep annotation. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. Pid: 3352, comm: neard Not tainted 3.5.0-999-nfc+ #2 Call Trace: [] __lock_acquire+0x8f6/0x1bf0 [] ? printk+0x4d/0x4f [] lock_acquire+0x9d/0x220 [] ? nfc_llcp_sock_from_sn+0x4e/0x160 [] _raw_read_lock+0x44/0x60 [] ? nfc_llcp_sock_from_sn+0x4e/0x160 [] nfc_llcp_sock_from_sn+0x4e/0x160 [] nfc_llcp_get_sdp_ssap+0xa7/0x1b0 [] llcp_sock_bind+0x173/0x210 [] sys_bind+0xe4/0x100 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Signed-off-by: Szymon Janc Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 90ef4a176819..d649fbf39d58 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -1156,8 +1156,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work); - local->sockets.lock = __RW_LOCK_UNLOCKED(local->sockets.lock); - local->connecting_sockets.lock = __RW_LOCK_UNLOCKED(local->connecting_sockets.lock); + rwlock_init(&local->sockets.lock); + rwlock_init(&local->connecting_sockets.lock); nfc_llcp_build_gb(local); -- cgit v1.2.1 From 4463523bef98ff827a89cf8219db7dfac4350241 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 26 Sep 2012 18:16:44 +0200 Subject: NFC: LLCP raw socket support This adds support for socket of type SOCK_RAW to LLCP. sk_buff are copied and sent to raw sockets with a 2 bytes extra header: The first byte header contains the nfc adapter index. The second one contains flags: - 0x01 - Direction (0=RX, 1=TX) - 0x02-0x80 - Reserved A raw socket has to be explicitly bound to a nfc adapter. This is achieved by specifying the adapter index to be bound to in the dev_idx field of the sockaddr_nfc_llcp struct passed to bind(). Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp/commands.c | 2 ++ net/nfc/llcp/llcp.c | 46 +++++++++++++++++++++++++ net/nfc/llcp/llcp.h | 3 ++ net/nfc/llcp/sock.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 137 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index b982b5b890d7..c45ccd6c094c 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -312,6 +312,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); } diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index d649fbf39d58..fc43747f0c33 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -558,6 +558,46 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) sock->recv_ack_n = (sock->recv_n - 1) % 16; } +void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, + struct sk_buff *skb, u8 direction) +{ + struct hlist_node *node; + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&local->raw_sockets.lock); + + sk_for_each(sk, node, &local->raw_sockets.head) { + if (sk->sk_state != LLCP_BOUND) + continue; + + if (skb_copy == NULL) { + skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, + GFP_ATOMIC); + + if (skb_copy == NULL) + continue; + + data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + + data[0] = local->dev ? local->dev->idx : 0xFF; + data[1] = direction; + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&local->raw_sockets.lock); + + kfree_skb(skb_copy); +} + static void nfc_llcp_tx_work(struct work_struct *work) { struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, @@ -578,6 +618,9 @@ static void nfc_llcp_tx_work(struct work_struct *work) DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + nfc_llcp_send_to_raw_sock(local, skb, + NFC_LLCP_DIRECTION_TX); + ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -1022,6 +1065,8 @@ static void nfc_llcp_rx_work(struct work_struct *work) print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + switch (ptype) { case LLCP_PDU_SYMM: pr_debug("SYMM\n"); @@ -1158,6 +1203,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) rwlock_init(&local->sockets.lock); rwlock_init(&local->connecting_sockets.lock); + rwlock_init(&local->raw_sockets.lock); nfc_llcp_build_gb(local); diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index af395c9ceb03..fdb2d24e60bd 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -86,6 +86,7 @@ struct nfc_llcp_local { /* sockets array */ struct llcp_sock_list sockets; struct llcp_sock_list connecting_sockets; + struct llcp_sock_list raw_sockets; }; struct nfc_llcp_sock { @@ -184,6 +185,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local); void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap); int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock); +void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, + struct sk_buff *skb, u8 direction); /* Sock API */ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 6e188d4020ba..40f056debf9a 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -142,6 +142,60 @@ error: return ret; } +static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, + int alen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + struct nfc_llcp_local *local; + struct nfc_dev *dev; + struct sockaddr_nfc_llcp llcp_addr; + int len, ret = 0; + + if (!addr || addr->sa_family != AF_NFC) + return -EINVAL; + + pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); + + memset(&llcp_addr, 0, sizeof(llcp_addr)); + len = min_t(unsigned int, sizeof(llcp_addr), alen); + memcpy(&llcp_addr, addr, len); + + lock_sock(sk); + + if (sk->sk_state != LLCP_CLOSED) { + ret = -EBADFD; + goto error; + } + + dev = nfc_get_device(llcp_addr.dev_idx); + if (dev == NULL) { + ret = -ENODEV; + goto error; + } + + local = nfc_llcp_find_local(dev); + if (local == NULL) { + ret = -ENODEV; + goto put_dev; + } + + llcp_sock->dev = dev; + llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; + + nfc_llcp_sock_link(&local->raw_sockets, sk); + + sk->sk_state = LLCP_BOUND; + +put_dev: + nfc_put_device(dev); + +error: + release_sock(sk); + return ret; +} + static int llcp_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; @@ -418,7 +472,10 @@ static int llcp_sock_release(struct socket *sock) release_sock(sk); - nfc_llcp_sock_unlink(&local->sockets, sk); + if (sock->type == SOCK_RAW) + nfc_llcp_sock_unlink(&local->raw_sockets, sk); + else + nfc_llcp_sock_unlink(&local->sockets, sk); out: sock_orphan(sk); @@ -614,7 +671,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { /* SOCK_STREAM: re-queue skb if it contains unreceived data */ - if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_RAW) { skb_pull(skb, copied); if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); @@ -655,6 +712,26 @@ static const struct proto_ops llcp_sock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops llcp_rawsock_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .bind = llcp_raw_sock_bind, + .connect = sock_no_connect, + .release = llcp_sock_release, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = llcp_sock_getname, + .poll = llcp_sock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = llcp_sock_recvmsg, + .mmap = sock_no_mmap, +}; + static void llcp_sock_destruct(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -732,10 +809,15 @@ static int llcp_sock_create(struct net *net, struct socket *sock, pr_debug("%p\n", sock); - if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM) + if (sock->type != SOCK_STREAM && + sock->type != SOCK_DGRAM && + sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sock->ops = &llcp_sock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &llcp_rawsock_ops; + else + sock->ops = &llcp_sock_ops; sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); if (sk == NULL) -- cgit v1.2.1 From 7d777c3d95a18ae42473c63104d87ad080885094 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 26 Sep 2012 13:39:10 -0400 Subject: NFC: Add dummy nfc_llc_shdlc_register definition This is used when CONFIG_NFC_SHDLC is disabled. Reported-by: Randy Dunlap Signed-off-by: John W. Linville Signed-off-by: Samuel Ortiz --- net/nfc/hci/llc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index c7014fdfc8c9..7be0b7f3ceb6 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -56,6 +56,14 @@ int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); + +#if defined(CONFIG_NFC_SHDLC) int nfc_llc_shdlc_register(void); +#else +static inline int nfc_llc_shdlc_register(void) +{ + return 0; +} +#endif #endif /* __LOCAL_LLC_H_ */ -- cgit v1.2.1 From 3c0cc8aa23f4b53446bbf385d4647eec6992a2cb Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Wed, 26 Sep 2012 14:17:12 +0200 Subject: NFC: Fix sleeping in invalid context when netlink socket is closed netlink_register_notifier requires notify functions to not sleep. nfc_stop_poll locks device mutex and must not be called from notifier. Create workqueue that will handle this for all devices. BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 0, irqs_disabled(): 0, pid: 4497, name: neard 1 lock held by neard/4497: Pid: 4497, comm: neard Not tainted 3.5.0-999-nfc+ #5 Call Trace: [] __might_sleep+0x145/0x200 [] mutex_lock_nested+0x2e/0x50 [] nfc_stop_poll+0x39/0xb0 [] nfc_genl_rcv_nl_event+0x77/0xc0 [] notifier_call_chain+0x5c/0x120 [] __atomic_notifier_call_chain+0x86/0x140 [] ? notifier_call_chain+0x120/0x120 [] ? skb_dequeue+0x67/0x90 [] atomic_notifier_call_chain+0x16/0x20 [] netlink_release+0x24a/0x280 [] sock_release+0x28/0xa0 [] sock_close+0x17/0x30 [] __fput+0xcc/0x250 [] ____fput+0xe/0x10 [] task_work_run+0x69/0x90 [] do_notify_resume+0x81/0xd0 [] int_signal+0x12/0x17 Signed-off-by: Szymon Janc Signed-off-by: Samuel Ortiz --- net/nfc/netlink.c | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..baa6af9500df 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -761,31 +761,63 @@ static struct genl_ops nfc_genl_ops[] = { }, }; -static int nfc_genl_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) + +struct urelease_work { + struct work_struct w; + int pid; +}; + +static void nfc_urelease_event_work(struct work_struct *work) { - struct netlink_notify *n = ptr; + struct urelease_work *w = container_of(work, struct urelease_work, w); struct class_dev_iter iter; struct nfc_dev *dev; - if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) - goto out; + pr_debug("pid %d\n", w->pid); - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + mutex_lock(&nfc_devlist_mutex); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + mutex_lock(&dev->genl_data.genl_data_mutex); + + if (dev->genl_data.poll_req_pid == w->pid) { nfc_stop_poll(dev); dev->genl_data.poll_req_pid = 0; } + + mutex_unlock(&dev->genl_data.genl_data_mutex); + dev = nfc_device_iter_next(&iter); } nfc_device_iter_exit(&iter); + mutex_unlock(&nfc_devlist_mutex); + + kfree(w); +} + +static int nfc_genl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + struct urelease_work *w; + + if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) + goto out; + + pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (w) { + INIT_WORK((struct work_struct *) w, nfc_urelease_event_work); + w->pid = n->pid; + schedule_work((struct work_struct *) w); + } + out: return NOTIFY_DONE; } -- cgit v1.2.1 From 50b78b2a6500d0e97c204c1b6c51df8c17358bbe Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Wed, 26 Sep 2012 14:22:10 +0200 Subject: NFC: Fix sleeping in atomic when releasing socket nfc_llcp_socket_release is calling lock_sock/release_sock while holding write lock for rwlock. Use bh_lock/unlock_sock instead. BUG: sleeping function called from invalid context at net/core/sock.c:2138 in_atomic(): 1, irqs_disabled(): 0, pid: 56, name: kworker/1:1 4 locks held by kworker/1:1/56: Pid: 56, comm: kworker/1:1 Not tainted 3.5.0-999-nfc+ #7 Call Trace: [] __might_sleep+0x145/0x200 [] lock_sock_nested+0x36/0xa0 [] ? _raw_write_lock+0x49/0x50 [] ? nfc_llcp_socket_release+0x30/0x200 [nfc] [] nfc_llcp_socket_release+0x52/0x200 [nfc] [] nfc_llcp_mac_is_down+0x20/0x30 [nfc] [] nfc_dep_link_down+0xaa/0xf0 [nfc] [] nfc_llcp_timeout_work+0x15/0x20 [nfc] [] process_one_work+0x197/0x7c0 [] ? process_one_work+0x136/0x7c0 [] ? __schedule+0x419/0x9c0 [] ? nfc_llcp_build_gb+0x1b0/0x1b0 [nfc] [] worker_thread+0x190/0x4c0 [] ? rescuer_thread+0x2a0/0x2a0 [] kthread+0xae/0xc0 [] ? trace_hardirqs_on+0xd/0x10 [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? flush_kthread_worker+0x150/0x150 [] ? gs_change+0x13/0x13 Signed-off-by: Szymon Janc Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index fc43747f0c33..c12c5ef3d036 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -56,7 +56,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) sk_for_each_safe(sk, node, tmp, &local->sockets.head) { llcp_sock = nfc_llcp_sock(sk); - lock_sock(sk); + bh_lock_sock(sk); if (sk->sk_state == LLCP_CONNECTED) nfc_put_device(llcp_sock->dev); @@ -68,26 +68,26 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue, accept_queue) { accept_sk = &lsk->sk; - lock_sock(accept_sk); + bh_lock_sock(accept_sk); nfc_llcp_accept_unlink(accept_sk); accept_sk->sk_state = LLCP_CLOSED; - release_sock(accept_sk); + bh_unlock_sock(accept_sk); sock_orphan(accept_sk); } if (listen == true) { - release_sock(sk); + bh_unlock_sock(sk); continue; } } sk->sk_state = LLCP_CLOSED; - release_sock(sk); + bh_unlock_sock(sk); sock_orphan(sk); -- cgit v1.2.1 From 7f8436a1269eaaf2d0b1054a325eddf4e14cb80d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 24 Sep 2012 18:29:01 +0000 Subject: l2tp: fix return value check In case of error, the function genlmsg_put() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6f936358d664 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } @@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || -- cgit v1.2.1 From d208328765a0ab425e36b5b828285f3337a85451 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Sep 2012 18:12:23 +0000 Subject: gre: fix handling of key 0 GRE driver incorrectly uses zero as a flag value. Zero is a perfectly valid value for key, and the tunnel should match packets with no key only with tunnels created without key, and vice versa. This is a slightly visible change since previously it might be possible to construct a working tunnel that sent key 0 and received only because of the key wildcard of zero. I.e the sender sent key of zero, but tunnel was defined without key. Note: using gre key 0 requires iproute2 utilities v3.2 or later. The original utility code was broken as well. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f233c1da2077..0d4c3832d490 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -214,11 +214,25 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, return tot; } +/* Does key in tunnel parameters match packet */ +static bool ipgre_key_match(const struct ip_tunnel_parm *p, + __u32 flags, __be32 key) +{ + if (p->i_flags & GRE_KEY) { + if (flags & GRE_KEY) + return key == p->i_key; + else + return false; /* key expected, none present */ + } else + return !(flags & GRE_KEY); +} + /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, __be32 remote, __be32 local, - __be32 key, __be16 gre_proto) + __u32 flags, __be32 key, + __be16 gre_proto) { struct net *net = dev_net(dev); int link = dev->ifindex; @@ -233,10 +247,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { if (local != t->parms.iph.saddr || remote != t->parms.iph.daddr || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -257,10 +273,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { if (remote != t->parms.iph.daddr || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -283,10 +301,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, if ((local != t->parms.iph.saddr && (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -489,6 +509,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; __be16 flags; + __be32 key = 0; flags = p[0]; if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { @@ -505,6 +526,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) if (skb_headlen(skb) < grehlen) return; + if (flags & GRE_KEY) + key = *(((__be32 *)p) + (grehlen / 4) - 1); + switch (type) { default: case ICMP_PARAMETERPROB: @@ -535,9 +559,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info) rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + flags, key, p[1]); + if (t == NULL) goto out; @@ -642,9 +665,10 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); rcu_read_lock(); - if ((tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, key, - gre_proto))) { + tunnel = ipgre_tunnel_lookup(skb->dev, + iph->saddr, iph->daddr, flags, key, + gre_proto); + if (tunnel) { struct pcpu_tstats *tstats; secpath_reset(skb); -- cgit v1.2.1 From 0c5794a66c7c6443759e6098841767958a031187 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Sep 2012 18:12:24 +0000 Subject: gre: remove unnecessary rcu_read_lock/unlock The gre function pointers for receive and error handling are always called (from gre.c) with rcu_read_lock already held. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 21 +++++++-------------- net/ipv6/ip6_gre.c | 14 +++----------- 2 files changed, 10 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0d4c3832d490..1c012cb2cb94 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -557,37 +557,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags, key, p[1]); if (t == NULL) - goto out; + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - goto out; + return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; + return; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; -out: - rcu_read_unlock(); } static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) @@ -625,7 +622,7 @@ static int ipgre_rcv(struct sk_buff *skb) __be16 gre_proto; if (!pskb_may_pull(skb, 16)) - goto drop_nolock; + goto drop; iph = ip_hdr(skb); h = skb->data; @@ -636,7 +633,7 @@ static int ipgre_rcv(struct sk_buff *skb) - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; + goto drop; if (flags&GRE_CSUM) { switch (skb->ip_summed) { @@ -664,7 +661,6 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - rcu_read_lock(); tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, flags, key, gre_proto); @@ -740,14 +736,11 @@ static int ipgre_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - rcu_read_unlock(); -drop_nolock: kfree_skb(skb); return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 424d11a4e7ff..b987d4db790f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -437,14 +437,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6h = (const struct ipv6hdr *)skb->data; p = (__be16 *)(skb->data + offset); - rcu_read_lock(); - t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); if (t == NULL) - goto out; + return; switch (type) { __u32 teli; @@ -489,8 +487,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, else t->err_count = 1; t->err_time = jiffies; -out: - rcu_read_unlock(); } static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, @@ -528,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb) __be16 gre_proto; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - goto drop_nolock; + goto drop; ipv6h = ipv6_hdr(skb); h = skb->data; @@ -539,7 +535,7 @@ static int ip6gre_rcv(struct sk_buff *skb) - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; + goto drop; if (flags&GRE_CSUM) { switch (skb->ip_summed) { @@ -567,7 +563,6 @@ static int ip6gre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - rcu_read_lock(); tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); @@ -646,14 +641,11 @@ static int ip6gre_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: - rcu_read_unlock(); -drop_nolock: kfree_skb(skb); return 0; } -- cgit v1.2.1 From b0558ef24a792906914fcad277f3befe2420e618 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Sep 2012 18:12:25 +0000 Subject: xfrm: remove extranous rcu_read_lock The handlers for xfrm_tunnel are always invoked with rcu read lock already. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 5 ----- net/ipv4/ipip.c | 9 +-------- net/ipv6/sit.c | 6 ------ 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3511ffba7bd4..978bca4818ae 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - rcu_read_lock(); tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; @@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb) u64_stats_update_end(&tstats->syncp); skb->dev = tunnel->dev; - rcu_read_unlock(); return 1; } - rcu_read_unlock(); return -1; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 99af1f0cc658..618bde867ac1 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -365,8 +365,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) } err = -ENOENT; - - rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -398,7 +396,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); + return err; } @@ -416,13 +414,11 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - rcu_read_lock(); tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -445,11 +441,8 @@ static int ipip_rcv(struct sk_buff *skb) ipip_ecn_decapsulate(iph, skb); netif_rx(skb); - - rcu_read_unlock(); return 0; } - rcu_read_unlock(); return -1; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3bd1bfc01f85..3ed54ffd8d50 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, @@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb) iph = ip_hdr(skb); - rcu_read_lock(); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } /* no tunnel matched, let upstream know, ipsec may handle it */ - rcu_read_unlock(); return 1; out: kfree_skb(skb); -- cgit v1.2.1 From eccc1bb8d4b4cf68d3c9becb083fa94ada7d495c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 25 Sep 2012 11:02:48 +0000 Subject: tunnel: drop packet if ECN present with not-ECT Linux tunnels were written before RFC6040 and therefore never implemented the corner case of ECN getting set in the outer header and the inner header not being ready for it. Section 4.2. Default Tunnel Egress Behaviour. o If the inner ECN field is Not-ECT, the decapsulator MUST NOT propagate any other ECN codepoint onwards. This is because the inner Not-ECT marking is set by transports that rely on dropped packets as an indication of congestion and would not understand or respond to any other ECN codepoint [RFC4774]. Specifically: * If the inner ECN field is Not-ECT and the outer ECN field is CE, the decapsulator MUST drop the packet. * If the inner ECN field is Not-ECT and the outer ECN field is Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the outgoing packet with the ECN field cleared to Not-ECT. This patch moves the ECN decap logic out of the individual tunnels into a common place. It also adds logging to allow detecting broken systems that set ECN bits incorrectly when tunneling (or an intermediate router might be changing the header). Overloads rx_frame_error to keep track of ECN related error. Thanks to Chris Wright who caught this while reviewing the new VXLAN tunnel. This code was tested by injecting faulty logic in other end GRE to send incorrectly encapsulated packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 38 ++++++++++++++++++++++---------------- net/ipv4/ipip.c | 42 +++++++++++++++++++++++++----------------- net/ipv6/ip6_gre.c | 54 ++++++++++++++++++++++++------------------------------ 3 files changed, 71 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1c012cb2cb94..ef0b861ce044 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -120,6 +120,10 @@ Alexey Kuznetsov. */ +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); @@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, tot->rx_crc_errors = dev->stats.rx_crc_errors; tot->rx_fifo_errors = dev->stats.rx_fifo_errors; tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -587,17 +593,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) -{ - if (INET_ECN_is_ce(iph->tos)) { - if (skb->protocol == htons(ETH_P_IP)) { - IP_ECN_set_ce(ip_hdr(skb)); - } else if (skb->protocol == htons(ETH_P_IPV6)) { - IP6_ECN_set_ce(ipv6_hdr(skb)); - } - } -} - static inline u8 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) { @@ -620,6 +615,7 @@ static int ipgre_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; int offset = 4; __be16 gre_proto; + int err; if (!pskb_may_pull(skb, 16)) goto drop; @@ -723,17 +719,27 @@ static int ipgre_rcv(struct sk_buff *skb) skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - ipgre_ecn_decapsulate(iph, skb); - netif_rx(skb); return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 618bde867ac1..e15b45297c09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -120,6 +120,10 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static int ipip_net_id __read_mostly; struct ipip_net { struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; @@ -400,28 +404,18 @@ out: return err; } -static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, - struct sk_buff *skb) -{ - struct iphdr *inner_iph = ip_hdr(skb); - - if (INET_ECN_is_ce(outer_iph->tos)) - IP_ECN_set_ce(inner_iph); -} - static int ipip_rcv(struct sk_buff *skb) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); + int err; tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return 0; - } + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; secpath_reset(skb); @@ -430,21 +424,35 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - ipip_ecn_decapsulate(iph, skb); - netif_rx(skb); return 0; } return -1; + +drop: + kfree_skb(skb); + return 0; } /* diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b987d4db790f..613a16647741 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -56,6 +56,10 @@ #include +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -149,7 +153,9 @@ static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, tot->rx_crc_errors = dev->stats.rx_crc_errors; tot->rx_fifo_errors = dev->stats.rx_fifo_errors; tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -489,28 +495,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->err_time = jiffies; } -static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, struct sk_buff *skb) -{ - __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; - - if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); - - if (INET_ECN_is_ce(dsfield)) - IP_ECN_set_ce(ip_hdr(skb)); -} - -static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, struct sk_buff *skb) -{ - if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); - - if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) - IP6_ECN_set_ce(ipv6_hdr(skb)); -} - static int ip6gre_rcv(struct sk_buff *skb) { const struct ipv6hdr *ipv6h; @@ -522,6 +506,7 @@ static int ip6gre_rcv(struct sk_buff *skb) struct ip6_tnl *tunnel; int offset = 4; __be16 gre_proto; + int err; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) goto drop; @@ -625,20 +610,29 @@ static int ip6gre_rcv(struct sk_buff *skb) skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + + err = IP6_ECN_decapsulate(ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - if (skb->protocol == htons(ETH_P_IP)) - ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb); - else if (skb->protocol == htons(ETH_P_IPV6)) - ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb); - netif_rx(skb); return 0; -- cgit v1.2.1 From 4b7cc7fc26598914ba202d04d4196efb26b22e69 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 25 Sep 2012 06:07:50 +0000 Subject: nf_defrag_ipv6: fix oops on module unloading fix copy-paste error introduced in linux-next commit "ipv6: add a new namespace for nf_conntrack_reasm" Signed-off-by: Konstantin Khlebnikov Cc: Amerigo Wang Cc: David S. Miller Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1af12fde08df..18bd9bbbd1c6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -106,7 +106,7 @@ static int __net_init nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ipv6.sysctl.frags_hdr = hdr; + net->nf_frag.sysctl.frags_hdr = hdr; return 0; err_reg: -- cgit v1.2.1 From f54ba7798848ce1385a71b36a2c997422c82220a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Sep 2012 18:35:47 -0400 Subject: pkt_sched: Fix warning false positives. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC refuses to recognize that all error control flows do in fact set err to something. Add an explicit initialization to shut it up. net/sched/sch_drr.c: In function ‘drr_enqueue’: net/sched/sch_drr.c:359:11: warning: ‘err’ may be used uninitialized in this function [-Wmaybe-uninitialized] net/sched/sch_qfq.c: In function ‘qfq_enqueue’: net/sched/sch_qfq.c:885:11: warning: ‘err’ may be used uninitialized in this function [-Wmaybe-uninitialized] Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 2 +- net/sched/sch_qfq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 9ce0b4fe23ff..71e50c80315f 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -352,7 +352,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - int err; + int err = 0; cl = drr_classify(skb, sch, &err); if (cl == NULL) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e4723d31fdd5..25566208f12b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -878,7 +878,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - int err; + int err = 0; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { -- cgit v1.2.1 From e2bcabec6ea5ba30dd2097dc1566e9957d14117c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 11:32:13 +0000 Subject: net: remove sk_init() helper It seems sk_init() has no value today and even does strange things : # grep . /proc/sys/net/core/?mem_* /proc/sys/net/core/rmem_default:212992 /proc/sys/net/core/rmem_max:131071 /proc/sys/net/core/wmem_default:212992 /proc/sys/net/core/wmem_max:131071 We can remove it completely. Signed-off-by: Eric Dumazet Reviewed-by: Shan Wei Signed-off-by: David S. Miller --- net/core/sock.c | 13 ------------- net/socket.c | 6 ------ 2 files changed, 19 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 727114cd6f7e..f5a426097236 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1464,19 +1464,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } EXPORT_SYMBOL_GPL(sk_setup_caps); -void __init sk_init(void) -{ - if (totalram_pages <= 4096) { - sysctl_wmem_max = 32767; - sysctl_rmem_max = 32767; - sysctl_wmem_default = 32767; - sysctl_rmem_default = 32767; - } else if (totalram_pages >= 131072) { - sysctl_wmem_max = 131071; - sysctl_rmem_max = 131071; - } -} - /* * Simple resource managers for sockets. */ diff --git a/net/socket.c b/net/socket.c index c641549a13e2..80dc7e84b046 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2600,12 +2600,6 @@ static int __init sock_init(void) if (err) goto out; - /* - * Initialize sock SLAB cache. - */ - - sk_init(); - /* * Initialize skbuff SLAB cache */ -- cgit v1.2.1 From bcc452935da124f26885e969ec5a78a8735d1452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Sep 2012 01:39:20 +0000 Subject: ipv6: gre: remove ip6gre_header_parse() dev_parse_header() callers provide 8 bytes of storage, so it's not possible to store an IPv6 address. Signed-off-by: Eric Dumazet Reported-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 613a16647741..0185679c5f53 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1279,16 +1279,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, return -t->hlen; } -static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr) -{ - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb); - memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr)); - return sizeof(struct in6_addr); -} - static const struct header_ops ip6gre_header_ops = { .create = ip6gre_header, - .parse = ip6gre_header_parse, }; static const struct net_device_ops ip6gre_netdev_ops = { -- cgit v1.2.1 From 5dff747b7038d10f9c174a1245263fd1c36a644d Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 26 Sep 2012 11:59:09 +0000 Subject: tcp: Remove unused parameter from tcp_v4_save_options struct sock *sk is not used inside tcp_v4_save_options. Thus it can be removed. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 93406c583f43..385eb79cf6aa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -925,8 +925,7 @@ EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. */ -static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) { const struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options_rcu *dopt = NULL; @@ -1568,7 +1567,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->loc_addr = daddr; ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); + ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; -- cgit v1.2.1 From bc9259a8bae9e814fc1f775a1b3effa13e6ad330 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 27 Sep 2012 04:11:00 +0000 Subject: inetpeer: fix token initialization When jiffies wraps around (for example, 5 minutes after the boot, see INITIAL_JIFFIES) and peer has just been created, now - peer->rate_last can be < XRLIM_BURST_FACTOR * timeout, so token is not set to the maximum value, thus some icmp packets can be unexpectedly dropped. Fix this case by initializing last_rate to 60 seconds in the past. Signed-off-by: Nicolas Dichtel Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd34..c7527f6b9ad9 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ -- cgit v1.2.1 From 69b08f62e17439ee3d436faf0b9a7ca6fffb78db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Sep 2012 06:46:57 +0000 Subject: net: use bigger pages in __netdev_alloc_frag We currently use percpu order-0 pages in __netdev_alloc_frag to deliver fragments used by __netdev_alloc_skb() Depending on NIC driver and arch being 32 or 64 bit, it allows a page to be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096 Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows : - Better filling of space (the ending hole overhead is less an issue) - Less calls to page allocator or accesses to page->_count - Could allow struct skb_shared_info futures changes without major performance impact. This patch implements a transparent fallback to smaller pages in case of memory pressure. It also uses a standard "struct page_frag" instead of a custom one. Signed-off-by: Eric Dumazet Cc: Alexander Duyck Cc: Benjamin LaHaise Signed-off-by: David S. Miller --- net/core/skbuff.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2ede3cfa8ffa..607a70ff2cc2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { - struct page *page; - unsigned int offset; - unsigned int pagecnt_bias; + struct page_frag frag; + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) +#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) +#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; void *data = NULL; + int order; unsigned long flags; local_irq_save(flags); nc = &__get_cpu_var(netdev_alloc_cache); - if (unlikely(!nc->page)) { + if (unlikely(!nc->frag.page)) { refill: - nc->page = alloc_page(gfp_mask); - if (unlikely(!nc->page)) - goto end; + for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { + gfp_t gfp = gfp_mask; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + nc->frag.page = alloc_pages(gfp, order); + if (likely(nc->frag.page)) + break; + if (--order < 0) + goto end; + } + nc->frag.size = PAGE_SIZE << order; recycle: - atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS); - nc->pagecnt_bias = NETDEV_PAGECNT_BIAS; - nc->offset = 0; + atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } - if (nc->offset + fragsz > PAGE_SIZE) { + if (nc->frag.offset + fragsz > nc->frag.size) { /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count)) + if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || + atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) goto recycle; goto refill; } - data = page_address(nc->page) + nc->offset; - nc->offset += fragsz; + data = page_address(nc->frag.page) + nc->frag.offset; + nc->frag.offset += fragsz; nc->pagecnt_bias--; end: local_irq_restore(flags); -- cgit v1.2.1 From 188c517a050ec5b123e72cab76ea213721e5bd9d Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 25 Sep 2012 15:17:07 +0000 Subject: ipv6: return errno pointers consistently for fib6_add_1() fib6_add_1() should consistently return errno pointers, rather than a mixture of NULL and errno pointers. Signed-off-by: Lin Ming Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 286acfc21250..24995a93ef8c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; ln->parent = pn; @@ -561,7 +561,7 @@ insert_above: node_free(in); if (ln) node_free(ln); - return NULL; + return ERR_PTR(-ENOMEM); } /* @@ -611,7 +611,7 @@ insert_above: ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; @@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(fn)) { err = PTR_ERR(fn); - fn = NULL; - } - - if (!fn) goto out; + } pn = fn; @@ -820,15 +817,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) allow_create, replace_required); if (IS_ERR(sn)) { - err = PTR_ERR(sn); - sn = NULL; - } - if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node in main tree. */ node_free(sfn); + err = PTR_ERR(sn); goto st_failure; } @@ -843,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(sn)) { err = PTR_ERR(sn); - sn = NULL; - } - if (!sn) goto st_failure; + } } if (!fn->leaf) { -- cgit v1.2.1 From 290e33593d76d1cebf873da50e036559c4820af9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Aug 2012 09:47:49 -0700 Subject: libceph: remove unused monc->have_fsid This is unused; use monc->client->have_fsid. Signed-off-by: Sage Weil --- net/ceph/mon_client.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 900ea0f043fc..e98f6070b5ae 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -769,7 +769,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) monc->monmap->mon_inst[i].name.num = cpu_to_le64(i); } monc->monmap->num_mon = num_mon; - monc->have_fsid = false; return 0; } -- cgit v1.2.1 From 7698f2f5e0d7b7a062213fa970b7c4e121adf38e Mon Sep 17 00:00:00 2001 From: Iulius Curt Date: Thu, 23 Aug 2012 15:14:29 +0300 Subject: libceph: Fix sparse warning Make ceph_monc_do_poolop() static to remove the following sparse warning: * net/ceph/mon_client.c:616:5: warning: symbol 'ceph_monc_do_poolop' was not declared. Should it be static? Also drops the 'ceph_monc_' prefix, now being a private function. Signed-off-by: Iulius Curt Signed-off-by: Sage Weil --- net/ceph/mon_client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index e98f6070b5ae..812eb3b46c1f 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -637,7 +637,7 @@ bad: /* * Do a synchronous pool op. */ -int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, +static int do_poolop(struct ceph_mon_client *monc, u32 op, u32 pool, u64 snapid, char *buf, int len) { @@ -687,7 +687,7 @@ out: int ceph_monc_create_snapid(struct ceph_mon_client *monc, u32 pool, u64 *snapid) { - return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, + return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, pool, 0, (char *)snapid, sizeof(*snapid)); } @@ -696,7 +696,7 @@ EXPORT_SYMBOL(ceph_monc_create_snapid); int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) { - return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, + return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, pool, snapid, 0, 0); } -- cgit v1.2.1 From cc4829e5967de577794b25dfcd1a65e509d171ed Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 5 Sep 2012 14:34:32 +0800 Subject: ceph: use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). Signed-off-by: Wei Yongjun Signed-off-by: Sage Weil --- net/ceph/pagelist.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 665cd23020ff..92866bebb65f 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -1,4 +1,3 @@ - #include #include #include @@ -134,8 +133,8 @@ int ceph_pagelist_truncate(struct ceph_pagelist *pl, ceph_pagelist_unmap_tail(pl); while (pl->head.prev != c->page_lru) { page = list_entry(pl->head.prev, struct page, lru); - list_del(&page->lru); /* remove from pagelist */ - list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ + /* move from pagelist to reserve */ + list_move_tail(&page->lru, &pl->free_list); ++pl->num_pages_free; } pl->room = c->room; -- cgit v1.2.1 From f4b549a5ac818722fc13d789584f41f4e00d78b5 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Fri, 28 Sep 2012 20:15:30 +0000 Subject: use skb_end_offset() in skb_try_coalesce() Commit ec47ea824774(skb: Add inline helper for getting the skb end offset from head) introduces this helper function, skb_end_offset(), we should make use of it. Signed-off-by: Weiping Pan Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d607bae075d5..cdc28598f4ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3483,8 +3483,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) return false; - delta = from->truesize - - SKB_TRUESIZE(skb_end_pointer(from) - from->head); + delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); } WARN_ON_ONCE(delta < len); -- cgit v1.2.1 From 64c6d08e6490fb18cea09bb03686c149946bd818 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Sep 2012 00:04:55 +0000 Subject: ipv6: del unreachable route when an addr is deleted on lo When an address is added on loopback (ip -6 a a 2002::1/128 dev lo), two routes are added: - one in the local table: local 2002::1 via :: dev lo proto none metric 0 - one the in main table (for the prefix): unreachable 2002::1 dev lo proto kernel metric 256 error -101 When the address is deleted, the route inserted in the main table remains because we use rt6_lookup(), which returns NULL when dst->error is set, which is the case here! Thus, it is better to use ip6_route_lookup() to avoid this kind of filter. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 719a828fb67f..480e68422efb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -788,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct in6_addr prefix; struct rt6_info *rt; struct net *net = dev_net(ifp->idev->dev); + struct flowi6 fl6 = {}; + ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); + fl6.flowi6_oif = ifp->idev->dev->ifindex; + fl6.daddr = prefix; + rt = (struct rt6_info *)ip6_route_lookup(net, &fl6, + RT6_LOOKUP_F_IFACE); - if (rt && addrconf_is_prefix_route(rt)) { + if (rt != net->ipv6.ip6_null_entry && + addrconf_is_prefix_route(rt)) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; -- cgit v1.2.1 From 861b650101eb0c627d171eb18de81dddb93d395e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2012 02:14:53 +0000 Subject: tcp: gro: add checksuming helpers skb with CHECKSUM_NONE cant currently be handled by GRO, and we notice this deep in GRO stack in tcp[46]_gro_receive() But there are cases where GRO can be a benefit, even with a lack of checksums. This preliminary work is needed to add GRO support to tunnels. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++--- net/ipv6/tcp_ipv6.c | 20 +++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 385eb79cf6aa..75735c9a6a9d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2803,6 +2803,8 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -2811,11 +2813,22 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d6212d6bc8d8..49c890386ce9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -763,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -771,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), + IPPROTO_TCP, 0)); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); -- cgit v1.2.1 From c9e6bc644e557338221e75c242ab12c275a67d1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2012 19:29:05 +0000 Subject: net: add gro_cells infrastructure This adds a new include file (include/net/gro_cells.h), to bring GRO (Generic Receive Offload) capability to tunnels, in a modular way. Because tunnels receive path is lockless, and GRO adds a serialization using a napi_struct, I chose to add an array of up to DEFAULT_MAX_NUM_RSS_QUEUES cells, so that multi queue devices wont be slowed down because of GRO layer. skb_get_rx_queue() is used as selector. In the future, we might add optional fanout capabilities, using rxhash for example. With help from Ben Hutchings who reminded me netif_get_num_default_rss_queues() function. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3e645f3751bf..ba4bb118a756 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2645,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit); =======================================================================*/ int netdev_max_backlog __read_mostly = 1000; +EXPORT_SYMBOL(netdev_max_backlog); + int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; int weight_p __read_mostly = 64; /* old backlog weight */ -- cgit v1.2.1 From 60769a5dcd8755715c7143b4571d5c44f01796f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2012 02:48:50 +0000 Subject: ipv4: gre: add GRO capability Add GRO capability to IPv4 GRE tunnels, using the gro_cells infrastructure. Tested using IPv4 and IPv6 TCP traffic inside this tunnel, and checking GRO is building large packets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ef0b861ce044..b1871d993d22 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -740,8 +740,7 @@ static int ipgre_rcv(struct sk_buff *skb) tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - netif_rx(skb); - + gro_cells_receive(&tunnel->gro_cells, skb); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -1319,6 +1318,9 @@ static const struct net_device_ops ipgre_netdev_ops = { static void ipgre_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); free_percpu(dev->tstats); free_netdev(dev); } @@ -1350,6 +1352,7 @@ static int ipgre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; struct iphdr *iph; + int err; tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; @@ -1376,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } -- cgit v1.2.1 From f674e72ff1aad23a99c7c205473cf02c85c2ac33 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 27 Sep 2012 22:21:19 +0000 Subject: net/key/af_key.c: add range checks on ->sadb_x_policy_len Because sizeof() is size_t then if "len" is negative, it counts as a large positive value. The call tree looks like: pfkey_sendmsg() -> pfkey_process() -> pfkey_spdadd() -> parse_ipsecrequests() Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 2ca7d7f6861c..08897a3c7ec7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1923,6 +1923,9 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); + if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) + return -EINVAL; + while (len >= sizeof(struct sadb_x_ipsecrequest)) { if ((err = parse_ipsecrequest(xp, rq)) < 0) return err; -- cgit v1.2.1 From 9fbef059d63cbc3249a3ddfd9a12cb511c8be55e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Oct 2012 05:21:14 +0000 Subject: gre: fix sparse warning Use be16 consistently when looking at flags. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b1871d993d22..7240f8e2dd45 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -222,7 +222,7 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, /* Does key in tunnel parameters match packet */ static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __u32 flags, __be32 key) + __be16 flags, __be32 key) { if (p->i_flags & GRE_KEY) { if (flags & GRE_KEY) @@ -237,7 +237,7 @@ static bool ipgre_key_match(const struct ip_tunnel_parm *p, static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, __be32 remote, __be32 local, - __u32 flags, __be32 key, + __be16 flags, __be32 key, __be16 gre_proto) { struct net *net = dev_net(dev); -- cgit v1.2.1 From d63b77f4c552cc3a20506871046ab0fcbc332609 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 20:59:48 -0700 Subject: libceph: check for invalid mapping If we encounter an invalid (e.g., zeroed) mapping, return an error and avoid a divide by zero. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 32 ++++++++++++++++++++------------ net/ceph/osdmap.c | 18 ++++++++++++++++-- 2 files changed, 36 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 42119c05e82c..f7b56e23988c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -52,7 +52,7 @@ static int op_has_extent(int op) op == CEPH_OSD_OP_WRITE); } -void ceph_calc_raw_layout(struct ceph_osd_client *osdc, +int ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, u64 snapid, u64 off, u64 *plen, u64 *bno, @@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; u64 orig_len = *plen; u64 objoff, objlen; /* extent in object */ + int r; reqhead->snapid = cpu_to_le64(snapid); /* object extent? */ - ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); + r = ceph_calc_file_object_mapping(layout, off, plen, bno, + &objoff, &objlen); + if (r < 0) + return r; if (*plen < orig_len) dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); @@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", *bno, objoff, objlen, req->r_num_pages); - + return 0; } EXPORT_SYMBOL(ceph_calc_raw_layout); @@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static void calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) +static int calc_layout(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + u64 off, u64 *plen, + struct ceph_osd_request *req, + struct ceph_osd_req_op *op) { u64 bno; + int r; - ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, + plen, &bno, req, op); + if (r < 0) + return r; snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); + + return r; } /* diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 3124b71a8883..5433fb0eb3c6 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -984,7 +984,7 @@ bad: * for now, we write only a single su, until we can * pass a stride back to the caller. */ -void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, +int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *ono, u64 *oxoff, u64 *oxlen) @@ -998,11 +998,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, osize, su); + if (su == 0 || sc == 0) + goto invalid; su_per_object = osize / su; + if (su_per_object == 0) + goto invalid; dout("osize %u / su %u = su_per_object %u\n", osize, su, su_per_object); - BUG_ON((su & ~PAGE_MASK) != 0); + if ((su & ~PAGE_MASK) != 0) + goto invalid; + /* bl = *off / su; */ t = off; do_div(t, su); @@ -1030,6 +1036,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, *plen = *oxlen; dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); + return 0; + +invalid: + dout(" invalid layout\n"); + *ono = 0; + *oxoff = 0; + *oxlen = 0; + return -EINVAL; } EXPORT_SYMBOL(ceph_calc_file_object_mapping); -- cgit v1.2.1 From 6816282dab3a72efe8c0d182c1bc2960d87f4322 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:01:02 -0700 Subject: ceph: propagate layout error on osd request creation If we are creating an osd request and get an invalid layout, return an EINVAL to the caller. We switch up the return to have an error code instead of NULL implying -ENOMEM. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f7b56e23988c..ccbdfbba9e53 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -464,6 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; + int r; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; @@ -482,10 +483,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, use_mempool, GFP_NOFS, NULL, NULL); if (!req) - return NULL; + return ERR_PTR(-ENOMEM); /* calculate max write size */ - calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(osdc, vino, layout, off, plen, req, ops); + if (r < 0) + return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ /* in case it differs from natural (file) alignment that @@ -1928,8 +1931,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, false, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short read due to an object boundary */ req->r_pages = pages; @@ -1971,8 +1974,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, snapc, do_sync, truncate_seq, truncate_size, mtime, nofail, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short write due to an object boundary */ req->r_pages = pages; -- cgit v1.2.1 From edc7d57327bd08bfd04f41531d49b176369db218 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Oct 2012 12:32:33 +0000 Subject: netlink: add attributes to fdb interface Later changes need to be able to refer to neighbour attributes when doing fdb_add. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 3 ++- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 02861190a3d4..d9576e6de2b8 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -608,7 +608,8 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, } /* Add new permanent fdb entry with RTM_NEWNEIGH */ -int br_fdb_add(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, u16 nlh_flags) { struct net_bridge_port *p; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 11a984b87e62..9b278c4ebee1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -364,7 +364,7 @@ extern void br_fdb_update(struct net_bridge *br, extern int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, const unsigned char *addr); -extern int br_fdb_add(struct ndmsg *nlh, +extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 nlh_flags); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 92575370d9f0..76d4c2c3c89b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2090,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { master = dev->master; - err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr, + err = master->netdev_ops->ndo_fdb_add(ndm, tb, + dev, addr, nlh->nlmsg_flags); if (err) goto out; @@ -2100,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr, + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, + dev, addr, nlh->nlmsg_flags); if (!err) { -- cgit v1.2.1 From 193ba924524e6afe192217982b2c2d67e4715d33 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Oct 2012 12:32:34 +0000 Subject: igmp: export symbol ip_mc_leave_group Needed for VXLAN. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3479b98a00a7..736ab70fd179 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1904,6 +1904,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_unlock(); return ret; } +EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex) -- cgit v1.2.1 From 916082b073ebb7f4e064cebce0768e34cacde508 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 2 Oct 2012 16:01:31 -0700 Subject: workqueue: avoid using deprecated functions The network merge brought in a few users of functions that got deprecated by the workqueue cleanups: the 'system_nrt_wq' is now the same as the regular system_wq, since all workqueues are now non- reentrant. Similarly, remove one use of flush_work_sync() - the regular flush_work() has become synchronous, and the "_sync()" version is thus deprecated as being superfluous. Signed-off-by: Linus Torvalds --- net/nfc/core.c | 2 +- net/nfc/hci/core.c | 6 +++--- net/nfc/hci/hcp.c | 2 +- net/nfc/hci/llc_shdlc.c | 16 ++++++++-------- net/nfc/llcp/llcp.c | 10 +++++----- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index c9eacc1f145f..479bee36dc3e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -715,7 +715,7 @@ static void nfc_check_pres_timeout(unsigned long data) { struct nfc_dev *dev = (struct nfc_dev *)data; - queue_work(system_nrt_wq, &dev->check_pres_work); + schedule_work(&dev->check_pres_work); } struct class nfc_class = { diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d378d93de62e..5fbb6e40793e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -141,7 +141,7 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err, kfree(hdev->cmd_pending_msg); hdev->cmd_pending_msg = NULL; - queue_work(system_nrt_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); } void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, @@ -326,7 +326,7 @@ static void nfc_hci_cmd_timeout(unsigned long data) { struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; - queue_work(system_nrt_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, @@ -714,7 +714,7 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); } else { skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); - queue_work(system_nrt_wq, &hdev->msg_rx_work); + schedule_work(&hdev->msg_rx_work); } } diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index 208eedd07ee3..bc308a7ca609 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -108,7 +108,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); - queue_work(system_nrt_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); return 0; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 8f69d791dcb3..01cbc72943cd 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -588,7 +588,7 @@ static void llc_shdlc_connect_timeout(unsigned long data) pr_debug("\n"); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } static void llc_shdlc_t1_timeout(unsigned long data) @@ -597,7 +597,7 @@ static void llc_shdlc_t1_timeout(unsigned long data) pr_debug("SoftIRQ: need to send ack\n"); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } static void llc_shdlc_t2_timeout(unsigned long data) @@ -606,7 +606,7 @@ static void llc_shdlc_t2_timeout(unsigned long data) pr_debug("SoftIRQ: need to retransmit\n"); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } static void llc_shdlc_sm_work(struct work_struct *work) @@ -646,7 +646,7 @@ static void llc_shdlc_sm_work(struct work_struct *work) case SHDLC_NEGOTIATING: if (timer_pending(&shdlc->connect_timer) == 0) { shdlc->state = SHDLC_CONNECTING; - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } llc_shdlc_handle_rcv_queue(shdlc); @@ -711,7 +711,7 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); wait_event(connect_wq, shdlc->connect_result != 1); @@ -728,7 +728,7 @@ static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } /* @@ -746,7 +746,7 @@ static void llc_shdlc_recv_frame(struct llc_shdlc *shdlc, struct sk_buff *skb) skb_queue_tail(&shdlc->rcv_q, skb); } - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, @@ -837,7 +837,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) skb_queue_tail(&shdlc->send_q, skb); - queue_work(system_nrt_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); return 0; } diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index c12c5ef3d036..cc10d073c338 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -181,7 +181,7 @@ static void nfc_llcp_symm_timer(unsigned long data) pr_err("SYMM timeout\n"); - queue_work(system_nrt_wq, &local->timeout_work); + schedule_work(&local->timeout_work); } struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) @@ -1101,7 +1101,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) } - queue_work(system_nrt_wq, &local->tx_work); + schedule_work(&local->tx_work); kfree_skb(local->rx_pending); local->rx_pending = NULL; @@ -1120,7 +1120,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(system_nrt_wq, &local->rx_work); + schedule_work(&local->rx_work); return; } @@ -1135,7 +1135,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(system_nrt_wq, &local->rx_work); + schedule_work(&local->rx_work); return 0; } @@ -1170,7 +1170,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, if (rf_mode == NFC_RF_INITIATOR) { pr_debug("Queueing Tx work\n"); - queue_work(system_nrt_wq, &local->tx_work); + schedule_work(&local->tx_work); } else { mod_timer(&local->link_timer, jiffies + msecs_to_jiffies(local->remote_lto)); -- cgit v1.2.1 From 62b54dd91567686a1cb118f76a72d5f4764a86dd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 1 Oct 2012 23:19:14 +0000 Subject: ipv6: don't add link local route when there is no link local address When an address is added on loopback (ip -6 a a 2002::1/128 dev lo), a route to fe80::/64 is added in the main table: unreachable fe80::/64 dev lo proto kernel metric 256 error -101 This route does not match any prefix (no fe80:: address on lo). In fact, addrconf_dev_config() will not add link local address because this function filters interfaces by type. If the link local address is added manually, the route to the link local prefix will be automatically added by addrconf_add_linklocal(). Note also, that this route is not deleted when the address is removed. After looking at the code, it seems that addrconf_add_lroute() is redundant with addrconf_add_linklocal(), because this function will add the link local route when the link local address is configured. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 480e68422efb..d7c56f8a5b4e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1769,14 +1769,6 @@ static void sit_route_add(struct net_device *dev) } #endif -static void addrconf_add_lroute(struct net_device *dev) -{ - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); -} - static struct inet6_dev *addrconf_add_dev(struct net_device *dev) { struct inet6_dev *idev; @@ -1794,8 +1786,6 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) if (!(dev->flags & IFF_LOOPBACK)) addrconf_add_mroute(dev); - /* Add link local route */ - addrconf_add_lroute(dev); return idev; } @@ -2474,10 +2464,9 @@ static void addrconf_sit_config(struct net_device *dev) sit_add_v4_addrs(idev); - if (dev->flags&IFF_POINTOPOINT) { + if (dev->flags&IFF_POINTOPOINT) addrconf_add_mroute(dev); - addrconf_add_lroute(dev); - } else + else sit_route_add(dev); } #endif -- cgit v1.2.1 From 5316cf9a5197eb80b2800e1acadde287924ca975 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Oct 2012 06:14:17 +0000 Subject: 8021q: fix mac_len recomputation in vlan_untag() skb_reset_mac_len() relies on the value of the skb->network_header pointer, therefore we must wait for such pointer to be recalculated before computing the new mac_len value. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index b258da88f675..add69d0fd99d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -105,7 +105,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) return NULL; memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; - skb_reset_mac_len(skb); return skb; } @@ -139,6 +138,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + return skb; err_free: -- cgit v1.2.1 From f4ef85bbda96324785097356336bc79cdd37db0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Oct 2012 01:25:26 +0000 Subject: ipv4: add a fib_type to fib_info commit d2d68ba9fe8 (ipv4: Cache input routes in fib_info nexthops.) introduced a regression for forwarding. This was hard to reproduce but the symptom was that packets were delivered to local host instead of being forwarded. David suggested to add fib_type to fib_info so that we dont inadvertently share same fib_info for different purposes. With help from Julian Anastasov who provided very helpful hints, reproduced here : Can it be a problem related to fib_info reuse from different routes. For example, when local IP address is created for subnet we have: broadcast 192.168.0.255 dev DEV proto kernel scope link src 192.168.0.1 192.168.0.0/24 dev DEV proto kernel scope link src 192.168.0.1 local 192.168.0.1 dev DEV proto kernel scope host src 192.168.0.1 The "dev DEV proto kernel scope link src 192.168.0.1" is a reused fib_info structure where we put cached routes. The result can be same fib_info for 192.168.0.255 and 192.168.0.0/24. RTN_BROADCAST is cached only for input routes. Incoming broadcast to 192.168.0.255 can be cached and can cause problems for traffic forwarded to 192.168.0.0/24. So, this patch should solve the problem because it separates the broadcast from unicast traffic. And the ip_route_input_slow caching will work for local and broadcast input routes (above routes 1 and 3) just because they differ in scope and use different fib_info. Many thanks to Chris Clayton for his patience and help. Reported-by: Chris Clayton Bisected-by: Chris Clayton Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Julian Anastasov Tested-by: Chris Clayton Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3509065e409a..267753060ffc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -314,6 +314,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && + nfi->fib_type == fi->fib_type && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && @@ -833,6 +834,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; + fi->fib_type = cfg->fc_type; fi->fib_nhs = nhs; change_nexthops(fi) { -- cgit v1.2.1 From 575659936f9d392b93b03ce97a58dbd4fce18abd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Oct 2012 05:43:21 +0000 Subject: sctp: fix a typo in prototype of __sctp_rcv_lookup() Just to avoid confusion when people only reads this prototype. Signed-off-by: Nicolas Dichtel Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 25dfe7380479..8bd3c279427e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -68,8 +68,8 @@ static int sctp_rcv_ootb(struct sk_buff *); static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, - const union sctp_addr *laddr, const union sctp_addr *paddr, + const union sctp_addr *laddr, struct sctp_transport **transportp); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, const union sctp_addr *laddr); -- cgit v1.2.1 From edfee0339e681a784ebacec7e8c2dc97dc6d2839 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Oct 2012 05:43:22 +0000 Subject: sctp: check src addr when processing SACK to update transport state Suppose we have an SCTP connection with two paths. After connection is established, path1 is not available, thus this path is marked as inactive. Then traffic goes through path2, but for some reasons packets are delayed (after rto.max). Because packets are delayed, the retransmit mechanism will switch again to path1. At this time, we receive a delayed SACK from path2. When we update the state of the path in sctp_check_transmitted(), we do not take into account the source address of the SACK, hence we update the wrong path. Signed-off-by: Nicolas Dichtel Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 15 ++++++++++----- net/sctp/sm_sideeffect.c | 4 ++-- net/sctp/sm_statefuns.c | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d16632e1503a..1b4a7f8ec3fd 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -63,6 +63,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, + union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn); @@ -1139,9 +1140,10 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc, * Process the SACK against the outqueue. Mostly, this just frees * things off the transmitted queue. */ -int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) +int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) { struct sctp_association *asoc = q->asoc; + struct sctp_sackhdr *sack = chunk->subh.sack_hdr; struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; @@ -1210,7 +1212,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ - sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn); + sctp_check_transmitted(q, &q->retransmit, NULL, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1219,7 +1221,8 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, - transport, sack, &highest_new_tsn); + transport, &chunk->source, sack, + &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of @@ -1326,6 +1329,7 @@ int sctp_outq_is_empty(const struct sctp_outq *q) static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, + union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn_in_sack) { @@ -1633,8 +1637,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Mark the destination transport address as * active if it is not so marked. */ - if ((transport->state == SCTP_INACTIVE) || - (transport->state == SCTP_UNCONFIRMED)) { + if ((transport->state == SCTP_INACTIVE || + transport->state == SCTP_UNCONFIRMED) && + sctp_cmp_addr_exact(&transport->ipaddr, saddr)) { sctp_assoc_control_transport( transport->asoc, transport, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bcfebb91559d..57f7de839b03 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -752,11 +752,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Helper function to process the process SACK command. */ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, - struct sctp_sackhdr *sackh) + struct sctp_chunk *chunk) { int err = 0; - if (sctp_outq_sack(&asoc->outqueue, sackh)) { + if (sctp_outq_sack(&asoc->outqueue, chunk)) { struct net *net = sock_net(asoc->base.sk); /* There are no more TSNs awaiting SACK. */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 094813b6c3c3..b6adef8a1e93 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3179,7 +3179,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* Return this SACK for further processing. */ - sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh)); + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_CHUNK(chunk)); /* Note: We do the rest of the work on the PROCESS_SACK * sideeffect. -- cgit v1.2.1 From 096895818cbf3382dc318f369e642fb7460ccb26 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 4 Oct 2012 03:36:13 +0000 Subject: silence some noisy printks in irda Fuzzing causes these printks to spew constantly. Changing them to DEBUG statements is consistent with other usage in the file, and makes them disappear when CONFIG_IRDA_DEBUG is disabled. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/irda/af_irda.c | 2 +- net/irda/irttp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index bb738c9f9146..b833677d83d6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -468,7 +468,7 @@ static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name) notify_t notify; if (self->tsap) { - IRDA_WARNING("%s: busy!\n", __func__); + IRDA_DEBUG(0, "%s: busy!\n", __func__); return -EBUSY; } diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 5c93f2952b08..1002e3396f72 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -440,7 +440,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) */ lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0); if (lsap == NULL) { - IRDA_WARNING("%s: unable to allocate LSAP!!\n", __func__); + IRDA_DEBUG(0, "%s: unable to allocate LSAP!!\n", __func__); return NULL; } -- cgit v1.2.1 From e57edf6b6dba975eceede20b4b13699d4e88cd78 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 4 Oct 2012 05:00:43 +0000 Subject: tipc: prevent dropped connections due to rcvbuf overflow When large buffers are sent over connected TIPC sockets, it is likely that the sk_backlog will be filled up on the receiver side, but the TIPC flow control mechanism is happily unaware of this since that is based on message count. The sender will receive a TIPC_ERR_OVERLOAD message when this occurs and drop it's side of the connection, leaving it stale on the receiver end. By increasing the sk_rcvbuf to a 'worst case' value, we avoid the overload caused by a full backlog queue and the flow control will work properly. This worst case value is the max TIPC message size times the flow control window, multiplied by two because a sender will transmit up to double the window size before a port is marked congested. We multiply this by 2 to account for the sk_buff and other overheads. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 09dc5b97e079..fd5f042dbff4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -220,6 +220,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; tipc_sk(sk)->p = tp_ptr; tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; -- cgit v1.2.1 From 32418cfe495c95013be2e805c087db89dcefac6d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 4 Oct 2012 09:51:11 +0000 Subject: Remove noisy printks from llcp_sock_connect Validation of userspace input shouldn't trigger dmesg spamming. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/nfc/llcp/sock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 40f056debf9a..63e4cdc92376 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -497,15 +497,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) { - pr_err("Invalid socket\n"); + addr->sa_family != AF_NFC) return -EINVAL; - } - if (addr->service_name_len == 0 && addr->dsap == 0) { - pr_err("Missing service name or dsap\n"); + if (addr->service_name_len == 0 && addr->dsap == 0) return -EINVAL; - } pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx, addr->target_idx, addr->nfc_protocol); -- cgit v1.2.1 From 6825a26c2dc21eb4f8df9c06d3786ddec97cf53b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 19 Sep 2012 19:25:34 +0000 Subject: ipv6: release reference of ip6_null_entry's dst entry in __ip6_del_rt as we hold dst_entry before we call __ip6_del_rt, so we should alse call dst_release not only return -ENOENT when the rt6_info is ip6_null_entry. and we already hold the dst entry, so I think it's safe to call dst_release out of the write-read lock. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1ddbc6ddac5..7c7e963260e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1593,17 +1593,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry) - return -ENOENT; + if (rt == net->ipv6.ip6_null_entry) { + err = -ENOENT; + goto out; + } table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, info); - dst_release(&rt->dst); - write_unlock_bh(&table->tb6_lock); +out: + dst_release(&rt->dst); return err; } -- cgit v1.2.1 From 6299b669b1340b9f7de2bc2bd565921a1494e7f7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Oct 2012 17:12:08 -0700 Subject: sections: fix section conflicts in net/can Signed-off-by: Andi Kleen Cc: Oliver Hartkopp Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/can/af_can.c | 2 +- net/can/bcm.c | 2 +- net/can/gw.c | 2 +- net/can/raw.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 821022a7214f..ddac1ee2ed20 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -63,7 +63,7 @@ #include "af_can.h" -static __initdata const char banner[] = KERN_INFO +static __initconst const char banner[] = KERN_INFO "can: controller area network core (" CAN_VERSION_STRING ")\n"; MODULE_DESCRIPTION("Controller Area Network PF_CAN core"); diff --git a/net/can/bcm.c b/net/can/bcm.c index 151b7730c12c..6f747582718e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -77,7 +77,7 @@ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) #define CAN_BCM_VERSION CAN_VERSION -static __initdata const char banner[] = KERN_INFO +static __initconst const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n"; MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); diff --git a/net/can/gw.c b/net/can/gw.c index 127879c55fb6..1f5c9785a262 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -58,7 +58,7 @@ #include #define CAN_GW_VERSION "20101209" -static __initdata const char banner[] = +static __initconst const char banner[] = KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; MODULE_DESCRIPTION("PF_CAN netlink gateway"); diff --git a/net/can/raw.c b/net/can/raw.c index 3e9c89356a93..5b0e3e330d97 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -55,7 +55,7 @@ #include #define CAN_RAW_VERSION CAN_VERSION -static __initdata const char banner[] = +static __initconst const char banner[] = KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n"; MODULE_DESCRIPTION("PF_CAN raw protocol"); -- cgit v1.2.1 From 04a6f82cf01aeef9fb058b2fca0ef1fe0a09c2fa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Oct 2012 17:12:11 -0700 Subject: sections: fix section conflicts in net Signed-off-by: Andi Kleen Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/decnet/dn_rules.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/addrlabel.c | 2 +- net/ipv6/fib6_rules.c | 2 +- net/ipv6/ip6mr.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index e65f2c856e06..faf7cc3483fe 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -220,7 +220,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = { +static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = { .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 274309d3aded..26aa65d1fce4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -262,7 +262,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) rt_cache_flush(ops->fro_net); } -static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { +static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1daa95c2a0ba..6168c4dc58b1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -221,7 +221,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { +static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 4be23da32b89..ff76eecfd622 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -79,7 +79,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL -static const __net_initdata struct ip6addrlbl_init_table +static const __net_initconst struct ip6addrlbl_init_table { const struct in6_addr *prefix; int prefixlen; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 0ff1cfd55bc4..d9fb9110f607 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { +static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 08ea3f0b6e55..f7c7c6319720 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -205,7 +205,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { +static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { .family = RTNL_FAMILY_IP6MR, .rule_size = sizeof(struct ip6mr_rule), .addr_size = sizeof(struct in6_addr), -- cgit v1.2.1 From 4c199a93a2d36b277a9fd209a0f2793f8460a215 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:30:32 -0700 Subject: rbtree: empty nodes have no color Empty nodes have no color. We can make use of this property to simplify the code emitted by the RB_EMPTY_NODE and RB_CLEAR_NODE macros. Also, we can get rid of the rb_init_node function which had been introduced by commit 88d19cf37952 ("timers: Add rb_init_node() to allow for stack allocated rb nodes") to avoid some issue with the empty node's color not being initialized. I'm not sure what the RB_EMPTY_NODE checks in rb_prev() / rb_next() are doing there, though. axboe introduced them in commit 10fd48f2376d ("rbtree: fixed reversed RB_EMPTY_NODE and rb_next/prev"). The way I see it, the 'empty node' abstraction is only used by rbtree users to flag nodes that they haven't inserted in any rbtree, so asking the predecessor or successor of such nodes doesn't make any sense. One final rb_init_node() caller was recently added in sysctl code to implement faster sysctl name lookups. This code doesn't make use of RB_EMPTY_NODE at all, and from what I could see it only called rb_init_node() under the mistaken assumption that such initialization was required before node insertion. [sfr@canb.auug.org.au: fix net/ceph/osd_client.c build] Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Acked-by: David Woodhouse Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Jens Axboe Cc: "Eric W. Biederman" Cc: John Stultz Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ceph/osd_client.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..c1d756cc7448 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,7 +221,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); - rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); -- cgit v1.2.1