summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c1
-rw-r--r--net/802/fddi.c1
-rw-r--r--net/802/sysctl_net_802.c1
-rw-r--r--net/802/tr.c13
-rw-r--r--net/8021q/vlan.c22
-rw-r--r--net/8021q/vlanproc.c1
-rw-r--r--net/Kconfig9
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c1
-rw-r--r--net/appletalk/atalk_proc.c1
-rw-r--r--net/appletalk/ddp.c86
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/Makefile2
-rw-r--r--net/atm/atm_sysfs.c175
-rw-r--r--net/atm/br2684.c4
-rw-r--r--net/atm/clip.c22
-rw-r--r--net/atm/common.c8
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/ioctl.c1
-rw-r--r--net/atm/ipcommon.c17
-rw-r--r--net/atm/lec.c3163
-rw-r--r--net/atm/lec.h171
-rw-r--r--net/atm/lec_arpc.h146
-rw-r--r--net/atm/mpc.c31
-rw-r--r--net/atm/mpc.h8
-rw-r--r--net/atm/mpoa_caches.c12
-rw-r--r--net/atm/mpoa_proc.c1
-rw-r--r--net/atm/pppoatm.c4
-rw-r--r--net/atm/proc.c3
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/resources.c26
-rw-r--r--net/atm/resources.h3
-rw-r--r--net/ax25/af_ax25.c18
-rw-r--r--net/ax25/ax25_dev.c5
-rw-r--r--net/ax25/ax25_ds_subr.c8
-rw-r--r--net/ax25/ax25_ds_timer.c4
-rw-r--r--net/ax25/ax25_iface.c19
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/ax25/ax25_ip.c24
-rw-r--r--net/ax25/ax25_out.c1
-rw-r--r--net/ax25/ax25_route.c49
-rw-r--r--net/ax25/ax25_timer.c1
-rw-r--r--net/ax25/sysctl_net_ax25.c5
-rw-r--r--net/bluetooth/af_bluetooth.c23
-rw-r--r--net/bluetooth/bnep/core.c27
-rw-r--r--net/bluetooth/bnep/netdev.c1
-rw-r--r--net/bluetooth/bnep/sock.c1
-rw-r--r--net/bluetooth/cmtp/capi.c5
-rw-r--r--net/bluetooth/cmtp/core.c4
-rw-r--r--net/bluetooth/cmtp/sock.c1
-rw-r--r--net/bluetooth/hci_conn.c140
-rw-r--r--net/bluetooth/hci_core.c49
-rw-r--r--net/bluetooth/hci_event.c233
-rw-r--r--net/bluetooth/hci_sock.c1
-rw-r--r--net/bluetooth/hci_sysfs.c325
-rw-r--r--net/bluetooth/hidp/Kconfig3
-rw-r--r--net/bluetooth/hidp/core.c27
-rw-r--r--net/bluetooth/hidp/sock.c1
-rw-r--r--net/bluetooth/l2cap.c376
-rw-r--r--net/bluetooth/lib.c1
-rw-r--r--net/bluetooth/rfcomm/core.c58
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/rfcomm/tty.c36
-rw-r--r--net/bluetooth/sco.c17
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_forward.c13
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_ioctl.c7
-rw-r--r--net/bridge/br_netfilter.c57
-rw-r--r--net/bridge/br_netlink.c38
-rw-r--r--net/bridge/br_stp_bpdu.c2
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c2
-rw-r--r--net/bridge/netfilter/ebt_ulog.c4
-rw-r--r--net/bridge/netfilter/ebtables.c97
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/dev.c149
-rw-r--r--net/core/dev_mcast.c3
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/ethtool.c48
-rw-r--r--net/core/fib_rules.c421
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow.c13
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c616
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/netevent.c69
-rw-r--r--net/core/netpoll.c38
-rw-r--r--net/core/pktgen.c329
-rw-r--r--net/core/rtnetlink.c586
-rw-r--r--net/core/skbuff.c161
-rw-r--r--net/core/sock.c157
-rw-r--r--net/core/stream.c16
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/user_dma.c1
-rw-r--r--net/core/utils.c222
-rw-r--r--net/core/wireless.c79
-rw-r--r--net/dccp/Kconfig16
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/ackvec.c20
-rw-r--r--net/dccp/ackvec.h5
-rw-r--r--net/dccp/ccids/Kconfig8
-rw-r--r--net/dccp/ccids/ccid2.c207
-rw-r--r--net/dccp/ccids/ccid2.h9
-rw-r--r--net/dccp/ccids/ccid3.c156
-rw-r--r--net/dccp/ccids/ccid3.h10
-rw-r--r--net/dccp/ccids/lib/loss_interval.c37
-rw-r--r--net/dccp/ccids/lib/loss_interval.h10
-rw-r--r--net/dccp/ccids/lib/packet_history.c169
-rw-r--r--net/dccp/ccids/lib/packet_history.h18
-rw-r--r--net/dccp/ccids/lib/tfrc.h2
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c3
-rw-r--r--net/dccp/dccp.h13
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/dccp/feat.c1
-rw-r--r--net/dccp/feat.h7
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/ipv4.c23
-rw-r--r--net/dccp/ipv6.c20
-rw-r--r--net/dccp/ipv6.h1
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/options.c5
-rw-r--r--net/dccp/output.c91
-rw-r--r--net/dccp/probe.c198
-rw-r--r--net/dccp/proto.c32
-rw-r--r--net/dccp/sysctl.c9
-rw-r--r--net/dccp/timer.c1
-rw-r--r--net/decnet/Kconfig1
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_dev.c39
-rw-r--r--net/decnet/dn_fib.c80
-rw-r--r--net/decnet/dn_neigh.c4
-rw-r--r--net/decnet/dn_nsp_in.c3
-rw-r--r--net/decnet/dn_route.c29
-rw-r--r--net/decnet/dn_rules.c512
-rw-r--r--net/decnet/dn_table.c175
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c1
-rw-r--r--net/econet/af_econet.c4
-rw-r--r--net/ethernet/eth.c199
-rw-r--r--net/ieee80211/Kconfig1
-rw-r--r--net/ieee80211/ieee80211_crypt.c3
-rw-r--r--net/ieee80211/ieee80211_crypt_ccmp.c59
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c128
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c47
-rw-r--r--net/ieee80211/ieee80211_geo.c1
-rw-r--r--net/ieee80211/ieee80211_module.c1
-rw-r--r--net/ieee80211/ieee80211_rx.c61
-rw-r--r--net/ieee80211/ieee80211_tx.c25
-rw-r--r--net/ieee80211/ieee80211_wx.c7
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_assoc.c52
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_auth.c32
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_io.c20
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_module.c90
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_priv.h8
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_wx.c40
-rw-r--r--net/ipv4/Kconfig63
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c75
-rw-r--r--net/ipv4/ah4.c45
-rw-r--r--net/ipv4/arp.c44
-rw-r--r--net/ipv4/cipso_ipv4.c1474
-rw-r--r--net/ipv4/datagram.c3
-rw-r--r--net/ipv4/devinet.c275
-rw-r--r--net/ipv4/esp4.c105
-rw-r--r--net/ipv4/fib_frontend.c492
-rw-r--r--net/ipv4/fib_hash.c155
-rw-r--r--net/ipv4/fib_lookup.h17
-rw-r--r--net/ipv4/fib_rules.c622
-rw-r--r--net/ipv4/fib_semantics.c549
-rw-r--r--net/ipv4/fib_trie.c133
-rw-r--r--net/ipv4/icmp.c27
-rw-r--r--net/ipv4/igmp.c125
-rw-r--r--net/ipv4/inet_connection_sock.c16
-rw-r--r--net/ipv4/inet_diag.c16
-rw-r--r--net/ipv4/inet_hashtables.c42
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c28
-rw-r--r--net/ipv4/ip_gre.c29
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c47
-rw-r--r--net/ipv4/ip_output.c52
-rw-r--r--net/ipv4/ip_sockglue.c14
-rw-r--r--net/ipv4/ipcomp.c42
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c38
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c14
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c36
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c42
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c18
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c18
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/multipath_drr.c1
-rw-r--r--net/ipv4/multipath_random.c1
-rw-r--r--net/ipv4/multipath_rr.c1
-rw-r--r--net/ipv4/multipath_wrandom.c15
-rw-r--r--net/ipv4/netfilter.c6
-rw-r--r--net/ipv4/netfilter/Kconfig24
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c54
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c4
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c228
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c87
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c521
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c13
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c175
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c53
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c18
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c38
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c19
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c8
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c70
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c10
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c99
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_h323.c16
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c191
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c28
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c15
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c23
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c20
-rw-r--r--net/ipv4/netfilter/ip_nat_sip.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c16
-rw-r--r--net/ipv4/netfilter/ip_queue.c30
-rw-r--r--net/ipv4/netfilter/ip_tables.c224
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c25
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c96
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c48
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c7
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c11
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c10
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c140
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c26
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c12
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c9
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/netfilter/ipt_ah.c1
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c54
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c3
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c63
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv4/netfilter/ipt_recent.c28
-rw-r--r--net/ipv4/netfilter/iptable_filter.c4
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c7
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c288
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c42
-rw-r--r--net/ipv4/tcp.c73
-rw-r--r--net/ipv4/tcp_bic.c3
-rw-r--r--net/ipv4/tcp_compound.c448
-rw-r--r--net/ipv4/tcp_cong.c5
-rw-r--r--net/ipv4/tcp_cubic.c3
-rw-r--r--net/ipv4/tcp_diag.c6
-rw-r--r--net/ipv4/tcp_highspeed.c16
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_hybla.c3
-rw-r--r--net/ipv4/tcp_input.c89
-rw-r--r--net/ipv4/tcp_ipv4.c64
-rw-r--r--net/ipv4/tcp_lp.c36
-rw-r--r--net/ipv4/tcp_minisocks.c13
-rw-r--r--net/ipv4/tcp_output.c66
-rw-r--r--net/ipv4/tcp_probe.c5
-rw-r--r--net/ipv4/tcp_scalable.c1
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/tcp_vegas.c3
-rw-r--r--net/ipv4/tcp_veno.c3
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/udp.c156
-rw-r--r--net/ipv4/xfrm4_input.c6
-rw-r--r--net/ipv4/xfrm4_mode_transport.c4
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c4
-rw-r--r--net/ipv4/xfrm4_output.c12
-rw-r--r--net/ipv4/xfrm4_policy.c33
-rw-r--r--net/ipv4/xfrm4_state.c88
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/Kconfig45
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c723
-rw-r--r--net/ipv6/af_inet6.c26
-rw-r--r--net/ipv6/ah6.c125
-rw-r--r--net/ipv6/anycast.c9
-rw-r--r--net/ipv6/datagram.c22
-rw-r--r--net/ipv6/esp6.c107
-rw-r--r--net/ipv6/exthdrs.c266
-rw-r--r--net/ipv6/fib6_rules.c305
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/inet6_connection_sock.c6
-rw-r--r--net/ipv6/inet6_hashtables.c9
-rw-r--r--net/ipv6/ip6_fib.c464
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_input.c11
-rw-r--r--net/ipv6/ip6_output.c230
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ipcomp6.c40
-rw-r--r--net/ipv6/ipv6_sockglue.c122
-rw-r--r--net/ipv6/ipv6_syms.c4
-rw-r--r--net/ipv6/mcast.c13
-rw-r--r--net/ipv6/mip6.c519
-rw-r--r--net/ipv6/ndisc.c54
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6_queue.c18
-rw-r--r--net/ipv6/netfilter/ip6_tables.c65
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c9
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c4
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c10
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c1
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c220
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c1
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c49
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c1
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c1
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c1
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c12
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c13
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/raw.c52
-rw-r--r--net/ipv6/reassembly.c15
-rw-r--r--net/ipv6/route.c1071
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c1
-rw-r--r--net/ipv6/tcp_ipv6.c55
-rw-r--r--net/ipv6/udp.c107
-rw-r--r--net/ipv6/xfrm6_input.c114
-rw-r--r--net/ipv6/xfrm6_mode_ro.c93
-rw-r--r--net/ipv6/xfrm6_mode_transport.c5
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c3
-rw-r--r--net/ipv6/xfrm6_output.c22
-rw-r--r--net/ipv6/xfrm6_policy.c88
-rw-r--r--net/ipv6/xfrm6_state.c193
-rw-r--r--net/ipv6/xfrm6_tunnel.c145
-rw-r--r--net/ipx/af_ipx.c11
-rw-r--r--net/ipx/ipx_proc.c1
-rw-r--r--net/ipx/ipx_route.c1
-rw-r--r--net/ipx/sysctl_net_ipx.c1
-rw-r--r--net/irda/af_irda.c13
-rw-r--r--net/irda/ircomm/ircomm_core.c5
-rw-r--r--net/irda/ircomm/ircomm_lmp.c4
-rw-r--r--net/irda/ircomm/ircomm_param.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c10
-rw-r--r--net/irda/irda_device.c5
-rw-r--r--net/irda/iriap.c15
-rw-r--r--net/irda/iriap_event.c2
-rw-r--r--net/irda/irias_object.c24
-rw-r--r--net/irda/irlan/irlan_client.c3
-rw-r--r--net/irda/irlan/irlan_common.c47
-rw-r--r--net/irda/irlan/irlan_eth.c1
-rw-r--r--net/irda/irlan/irlan_provider.c12
-rw-r--r--net/irda/irlap.c9
-rw-r--r--net/irda/irlap_event.c1
-rw-r--r--net/irda/irlap_frame.c62
-rw-r--r--net/irda/irlmp.c12
-rw-r--r--net/irda/irlmp_event.c1
-rw-r--r--net/irda/irlmp_frame.c1
-rw-r--r--net/irda/irmod.c1
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/irda/irsysctl.c1
-rw-r--r--net/irda/irttp.c27
-rw-r--r--net/irda/qos.c1
-rw-r--r--net/irda/timer.c1
-rw-r--r--net/key/af_key.c70
-rw-r--r--net/lapb/lapb_iface.c16
-rw-r--r--net/llc/af_llc.c21
-rw-r--r--net/llc/llc_core.c3
-rw-r--r--net/llc/llc_if.c1
-rw-r--r--net/llc/llc_proc.c1
-rw-r--r--net/llc/llc_sap.c7
-rw-r--r--net/llc/llc_station.c1
-rw-r--r--net/llc/sysctl_net_llc.c1
-rw-r--r--net/netfilter/Kconfig32
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c25
-rw-r--r--net/netfilter/nf_conntrack_core.c212
-rw-r--r--net/netfilter/nf_conntrack_ftp.c99
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c32
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c6
-rw-r--r--net/netfilter/nf_internals.h3
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_queue.c90
-rw-r--r--net/netfilter/nf_sockopt.c1
-rw-r--r--net/netfilter/nfnetlink.c3
-rw-r--r--net/netfilter/nfnetlink_log.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/x_tables.c270
-rw-r--r--net/netfilter/xt_CLASSIFY.c63
-rw-r--r--net/netfilter/xt_CONNMARK.c148
-rw-r--r--net/netfilter/xt_CONNSECMARK.c61
-rw-r--r--net/netfilter/xt_DSCP.c118
-rw-r--r--net/netfilter/xt_MARK.c120
-rw-r--r--net/netfilter/xt_NFQUEUE.c71
-rw-r--r--net/netfilter/xt_NOTRACK.c50
-rw-r--r--net/netfilter/xt_SECMARK.c61
-rw-r--r--net/netfilter/xt_comment.c45
-rw-r--r--net/netfilter/xt_connbytes.c48
-rw-r--r--net/netfilter/xt_connmark.c88
-rw-r--r--net/netfilter/xt_conntrack.c185
-rw-r--r--net/netfilter/xt_dccp.c52
-rw-r--r--net/netfilter/xt_dscp.c103
-rw-r--r--net/netfilter/xt_esp.c52
-rw-r--r--net/netfilter/xt_helper.c55
-rw-r--r--net/netfilter/xt_length.c43
-rw-r--r--net/netfilter/xt_limit.c111
-rw-r--r--net/netfilter/xt_mac.c52
-rw-r--r--net/netfilter/xt_mark.c80
-rw-r--r--net/netfilter/xt_multiport.c115
-rw-r--r--net/netfilter/xt_physdev.c66
-rw-r--r--net/netfilter/xt_pkttype.c56
-rw-r--r--net/netfilter/xt_policy.c53
-rw-r--r--net/netfilter/xt_quota.c55
-rw-r--r--net/netfilter/xt_sctp.c54
-rw-r--r--net/netfilter/xt_state.c56
-rw-r--r--net/netfilter/xt_statistic.c55
-rw-r--r--net/netfilter/xt_string.c61
-rw-r--r--net/netfilter/xt_tcpmss.c97
-rw-r--r--net/netfilter/xt_tcpudp.c109
-rw-r--r--net/netlabel/Kconfig17
-rw-r--r--net/netlabel/Makefile16
-rw-r--r--net/netlabel/netlabel_cipso_v4.c773
-rw-r--r--net/netlabel/netlabel_cipso_v4.h166
-rw-r--r--net/netlabel/netlabel_domainhash.c448
-rw-r--r--net/netlabel/netlabel_domainhash.h71
-rw-r--r--net/netlabel/netlabel_kapi.c254
-rw-r--r--net/netlabel/netlabel_mgmt.c648
-rw-r--r--net/netlabel/netlabel_mgmt.h171
-rw-r--r--net/netlabel/netlabel_unlabeled.c280
-rw-r--r--net/netlabel/netlabel_unlabeled.h89
-rw-r--r--net/netlabel/netlabel_user.c117
-rw-r--r--net/netlabel/netlabel_user.h96
-rw-r--r--net/netlink/af_netlink.c116
-rw-r--r--net/netlink/attr.c125
-rw-r--r--net/netlink/genetlink.c57
-rw-r--r--net/netrom/af_netrom.c30
-rw-r--r--net/netrom/nr_dev.c1
-rw-r--r--net/netrom/nr_route.c12
-rw-r--r--net/netrom/nr_timer.c2
-rw-r--r--net/packet/af_packet.c50
-rw-r--r--net/rose/af_rose.c20
-rw-r--r--net/rose/rose_dev.c6
-rw-r--r--net/rxrpc/call.c3
-rw-r--r--net/rxrpc/connection.c9
-rw-r--r--net/rxrpc/krxsecd.c3
-rw-r--r--net/rxrpc/peer.c3
-rw-r--r--net/rxrpc/rxrpc_syms.c1
-rw-r--r--net/rxrpc/sysctl.c1
-rw-r--r--net/rxrpc/transport.c6
-rw-r--r--net/sched/Kconfig8
-rw-r--r--net/sched/act_api.c285
-rw-r--r--net/sched/act_gact.c143
-rw-r--r--net/sched/act_ipt.c180
-rw-r--r--net/sched/act_mirred.c160
-rw-r--r--net/sched/act_pedit.c172
-rw-r--r--net/sched/act_police.c509
-rw-r--r--net/sched/act_simple.c184
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c9
-rw-r--r--net/sched/cls_fw.c32
-rw-r--r--net/sched/cls_route.c10
-rw-r--r--net/sched/cls_rsvp.h10
-rw-r--r--net/sched/cls_tcindex.c13
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/em_cmp.c1
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/em_nbyte.c1
-rw-r--r--net/sched/em_text.c1
-rw-r--r--net/sched/em_u32.c1
-rw-r--r--net/sched/ematch.c4
-rw-r--r--net/sched/estimator.c3
-rw-r--r--net/sched/sch_api.c19
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_blackhole.c1
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_dsmark.c1
-rw-r--r--net/sched/sch_fifo.c1
-rw-r--r--net/sched/sch_generic.c72
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hfsc.c4
-rw-r--r--net/sched/sch_htb.c1367
-rw-r--r--net/sched/sch_ingress.c1
-rw-r--r--net/sched/sch_netem.c9
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfq.c1
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sctp/associola.c27
-rw-r--r--net/sctp/bind_addr.c8
-rw-r--r--net/sctp/endpointola.c13
-rw-r--r--net/sctp/input.c18
-rw-r--r--net/sctp/inqueue.c4
-rw-r--r--net/sctp/ipv6.c10
-rw-r--r--net/sctp/output.c10
-rw-r--r--net/sctp/outqueue.c18
-rw-r--r--net/sctp/proc.c17
-rw-r--r--net/sctp/protocol.c18
-rw-r--r--net/sctp/sm_make_chunk.c91
-rw-r--r--net/sctp/sm_sideeffect.c12
-rw-r--r--net/sctp/sm_statefuns.c202
-rw-r--r--net/sctp/socket.c138
-rw-r--r--net/sctp/sysctl.c140
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/socket.c1028
-rw-r--r--net/sunrpc/auth.c12
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c52
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c95
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c29
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c8
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c9
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c34
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c337
-rw-r--r--net/sunrpc/auth_null.c10
-rw-r--r--net/sunrpc/auth_unix.c11
-rw-r--r--net/sunrpc/cache.c6
-rw-r--r--net/sunrpc/clnt.c300
-rw-r--r--net/sunrpc/pmap_clnt.c267
-rw-r--r--net/sunrpc/rpc_pipe.c122
-rw-r--r--net/sunrpc/sched.c107
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/stats.c7
-rw-r--r--net/sunrpc/sunrpc_syms.c4
-rw-r--r--net/sunrpc/svc.c61
-rw-r--r--net/sunrpc/svcauth.c4
-rw-r--r--net/sunrpc/svcauth_unix.c38
-rw-r--r--net/sunrpc/svcsock.c55
-rw-r--r--net/sunrpc/sysctl.c1
-rw-r--r--net/sunrpc/timer.c2
-rw-r--r--net/sunrpc/xdr.c81
-rw-r--r--net/sunrpc/xprt.c116
-rw-r--r--net/sunrpc/xprtsock.c166
-rw-r--r--net/sysctl_net.c1
-rw-r--r--net/tipc/bcast.c83
-rw-r--r--net/tipc/bcast.h2
-rw-r--r--net/tipc/bearer.c78
-rw-r--r--net/tipc/cluster.c28
-rw-r--r--net/tipc/config.c87
-rw-r--r--net/tipc/core.c10
-rw-r--r--net/tipc/core.h26
-rw-r--r--net/tipc/dbg.c2
-rw-r--r--net/tipc/discover.c15
-rw-r--r--net/tipc/eth_media.c29
-rw-r--r--net/tipc/handler.c2
-rw-r--r--net/tipc/link.c233
-rw-r--r--net/tipc/name_distr.c30
-rw-r--r--net/tipc/name_table.c223
-rw-r--r--net/tipc/net.c7
-rw-r--r--net/tipc/node.c104
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/node_subscr.c15
-rw-r--r--net/tipc/port.c50
-rw-r--r--net/tipc/ref.c37
-rw-r--r--net/tipc/socket.c100
-rw-r--r--net/tipc/subscr.c23
-rw-r--r--net/tipc/user_reg.c5
-rw-r--r--net/tipc/zone.c20
-rw-r--r--net/tipc/zone.h4
-rw-r--r--net/unix/af_unix.c43
-rw-r--r--net/wanrouter/af_wanpipe.c10
-rw-r--r--net/wanrouter/wanmain.c10
-rw-r--r--net/wanrouter/wanproc.c1
-rw-r--r--net/x25/af_x25.c1
-rw-r--r--net/x25/x25_dev.c1
-rw-r--r--net/x25/x25_proc.c1
-rw-r--r--net/x25/x25_route.c1
-rw-r--r--net/xfrm/Kconfig16
-rw-r--r--net/xfrm/Makefile3
-rw-r--r--net/xfrm/xfrm_algo.c95
-rw-r--r--net/xfrm/xfrm_hash.c41
-rw-r--r--net/xfrm/xfrm_hash.h128
-rw-r--r--net/xfrm/xfrm_input.c12
-rw-r--r--net/xfrm/xfrm_policy.c932
-rw-r--r--net/xfrm/xfrm_state.c670
-rw-r--r--net/xfrm/xfrm_user.c352
613 files changed, 27370 insertions, 16175 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 282c4ab1abe6..2a27e37bc4cb 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -10,7 +10,6 @@
* v 1.0 03/22/99
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/types.h>
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ac242a4bc346..797c6d961deb 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -26,7 +26,6 @@
* Maciej W. Rozycki : IPv6 support
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/system.h>
#include <linux/types.h>
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
index 700129556c13..ead56037398b 100644
--- a/net/802/sysctl_net_802.c
+++ b/net/802/sysctl_net_802.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/if_tr.h>
#include <linux/sysctl.h>
diff --git a/net/802/tr.c b/net/802/tr.c
index e9dc803f2fe0..829deb41ce81 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -17,7 +17,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -165,7 +164,7 @@ static int tr_rebuild_header(struct sk_buff *skb)
*/
if(trllc->ethertype != htons(ETH_P_IP)) {
- printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype));
+ printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype));
return 0;
}
@@ -187,7 +186,7 @@ static int tr_rebuild_header(struct sk_buff *skb)
* it via SNAP.
*/
-unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct trh_hdr *trh=(struct trh_hdr *)skb->data;
@@ -230,15 +229,15 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev)
*/
if (trllc->dsap == EXTENDED_SAP &&
- (trllc->ethertype == ntohs(ETH_P_IP) ||
- trllc->ethertype == ntohs(ETH_P_IPV6) ||
- trllc->ethertype == ntohs(ETH_P_ARP)))
+ (trllc->ethertype == htons(ETH_P_IP) ||
+ trllc->ethertype == htons(ETH_P_IPV6) ||
+ trllc->ethertype == htons(ETH_P_ARP)))
{
skb_pull(skb, sizeof(struct trllc));
return trllc->ethertype;
}
- return ntohs(ETH_P_TR_802_2);
+ return htons(ETH_P_TR_802_2);
}
/*
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3948949a609a..18fcb9fa518d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -67,10 +67,6 @@ static struct packet_type vlan_packet_type = {
.func = vlan_skb_recv, /* VLAN receive method */
};
-/* Bits of netdev state that are propagated from real device to virtual */
-#define VLAN_LINK_STATE_MASK \
- ((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)|(1<<__LINK_STATE_DORMANT))
-
/* End of global variables definitions. */
/*
@@ -364,6 +360,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
}
}
+/*
+ * vlan network devices have devices nesting below it, and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key vlan_netdev_xmit_lock_key;
+
+
/* Attach a VLAN device to a mac address (ie Ethernet Card).
* Returns the device that was created, or NULL if there was
* an error of some kind.
@@ -460,6 +464,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
vlan_setup);
+
if (new_dev == NULL)
goto out_unlock;
@@ -470,7 +475,9 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
new_dev->flags = real_dev->flags;
new_dev->flags &= ~IFF_UP;
- new_dev->state = real_dev->state & ~(1<<__LINK_STATE_START);
+ new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+ (1<<__LINK_STATE_DORMANT))) |
+ (1<<__LINK_STATE_PRESENT);
/* need 4 bytes for extra VLAN header info,
* hope the underlying device can handle it.
@@ -518,6 +525,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
if (register_netdevice(new_dev))
goto out_free_newdev;
+ lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+
new_dev->iflink = real_dev->ifindex;
vlan_transfer_operstate(real_dev, new_dev);
linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
@@ -531,12 +540,11 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
* so it cannot "appear" on us.
*/
if (!grp) { /* need to add a new group */
- grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL);
+ grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
if (!grp)
goto out_free_unregister;
/* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */
- memset(grp, 0, sizeof(struct vlan_group));
grp->real_dev_ifindex = real_dev->ifindex;
hlist_add_head_rcu(&grp->hlist,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 7b214cffc956..a8fc0de1f969 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -17,7 +17,6 @@
* Jan 20, 1998 Ben Greear Initial Version
*****************************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/stddef.h> /* offsetof(), etc. */
#include <linux/errno.h> /* return codes */
diff --git a/net/Kconfig b/net/Kconfig
index c6cec5aa5486..a81aca43932f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -177,7 +177,7 @@ source "net/lapb/Kconfig"
config NET_DIVERT
bool "Frame Diverter (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && BROKEN
---help---
The Frame Diverter allows you to divert packets from the
network, that are not aimed at the interface receiving it (in
@@ -231,7 +231,7 @@ config NET_TCPPROBE
TCP congestion avoidance modules. If you don't understand
what was just said, you don't need it: say N.
- Documentation on how to use the packet generator can be found
+ Documentation on how to use TCP connection probing can be found
at http://linux-net.osdl.org/index.php/TcpProbe
To compile this code as a module, choose M here: the
@@ -249,6 +249,11 @@ source "net/ieee80211/Kconfig"
config WIRELESS_EXT
bool
+source "net/netlabel/Kconfig"
+
+config FIB_RULES
+ bool
+
endif # if NET
endmenu # Networking
diff --git a/net/Makefile b/net/Makefile
index 065796f5fb17..ad4d14f4bb29 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
obj-$(CONFIG_IEEE80211) += ieee80211/
obj-$(CONFIG_TIPC) += tipc/
+obj-$(CONFIG_NETLABEL) += netlabel/
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 7076097debc2..f3777ec5bcb9 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -29,7 +29,6 @@
*
*/
-#include <linux/config.h>
#include <linux/if_arp.h>
#include <net/sock.h>
#include <net/datalink.h>
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index dc4048dd98c1..7ae4916cd26d 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -8,7 +8,6 @@
* Free Software Foundation, version 2.
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7b1eb9a4fc96..708e2e0371af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -51,7 +51,6 @@
*
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/if_arp.h>
@@ -228,12 +227,11 @@ static void atif_drop_device(struct net_device *dev)
static struct atalk_iface *atif_add_device(struct net_device *dev,
struct atalk_addr *sa)
{
- struct atalk_iface *iface = kmalloc(sizeof(*iface), GFP_KERNEL);
+ struct atalk_iface *iface = kzalloc(sizeof(*iface), GFP_KERNEL);
if (!iface)
goto out;
- memset(iface, 0, sizeof(*iface));
dev_hold(dev);
iface->dev = dev;
dev->atalk_ptr = iface;
@@ -560,12 +558,11 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint)
}
if (!rt) {
- rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
+ rt = kzalloc(sizeof(*rt), GFP_ATOMIC);
retval = -ENOBUFS;
if (!rt)
goto out_unlock;
- memset(rt, 0, sizeof(*rt));
rt->next = atalk_routes;
atalk_routes = rt;
@@ -1005,7 +1002,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
return sum;
}
-static unsigned short atalk_checksum(const struct sk_buff *skb, int len)
+static __be16 atalk_checksum(const struct sk_buff *skb, int len)
{
unsigned long sum;
@@ -1013,7 +1010,7 @@ static unsigned short atalk_checksum(const struct sk_buff *skb, int len)
sum = atalk_sum_skb(skb, 4, len-4, 0);
/* Use 0xFFFF for 0. 0 itself means none */
- return sum ? htons((unsigned short)sum) : 0xFFFF;
+ return sum ? htons((unsigned short)sum) : htons(0xFFFF);
}
static struct proto ddp_proto = {
@@ -1292,7 +1289,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
#endif
static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
- struct ddpehdr *ddp, struct ddpebits *ddphv,
+ struct ddpehdr *ddp, __u16 len_hops,
int origlen)
{
struct atalk_route *rt;
@@ -1320,10 +1317,12 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
/* Route the packet */
rt = atrtr_find(&ta);
- if (!rt || ddphv->deh_hops == DDP_MAXHOPS)
+ /* increment hops count */
+ len_hops += 1 << 10;
+ if (!rt || !(len_hops & (15 << 10)))
goto free_it;
+
/* FIXME: use skb->cb to be able to use shared skbs */
- ddphv->deh_hops++;
/*
* Route goes through another gateway, so set the target to the
@@ -1338,11 +1337,10 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
/* Fix up skb->len field */
skb_trim(skb, min_t(unsigned int, origlen,
(rt->dev->hard_header_len +
- ddp_dl->header_length + ddphv->deh_len)));
+ ddp_dl->header_length + (len_hops & 1023))));
- /* Mend the byte order */
/* FIXME: use skb->cb to be able to use shared skbs */
- *((__u16 *)ddp) = ntohs(*((__u16 *)ddphv));
+ ddp->deh_len_hops = htons(len_hops);
/*
* Send the buffer onwards
@@ -1397,7 +1395,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
struct atalk_iface *atif;
struct sockaddr_at tosat;
int origlen;
- struct ddpebits ddphv;
+ __u16 len_hops;
/* Don't mangle buffer if shared */
if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
@@ -1409,16 +1407,11 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
ddp = ddp_hdr(skb);
- /*
- * Fix up the length field [Ok this is horrible but otherwise
- * I end up with unions of bit fields and messy bit field order
- * compiler/endian dependencies..]
- */
- *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+ len_hops = ntohs(ddp->deh_len_hops);
/* Trim buffer in case of stray trailing data */
origlen = skb->len;
- skb_trim(skb, min_t(unsigned int, skb->len, ddphv.deh_len));
+ skb_trim(skb, min_t(unsigned int, skb->len, len_hops & 1023));
/*
* Size check to see if ddp->deh_len was crap
@@ -1433,7 +1426,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
* valid for net byte orders all over the networking code...
*/
if (ddp->deh_sum &&
- atalk_checksum(skb, ddphv.deh_len) != ddp->deh_sum)
+ atalk_checksum(skb, len_hops & 1023) != ddp->deh_sum)
/* Not a valid AppleTalk frame - dustbin time */
goto freeit;
@@ -1447,7 +1440,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
/* Not ours, so we route the packet via the correct
* AppleTalk iface
*/
- atalk_route_packet(skb, dev, ddp, &ddphv, origlen);
+ atalk_route_packet(skb, dev, ddp, len_hops, origlen);
goto out;
}
@@ -1492,7 +1485,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
/* Find our address */
struct atalk_addr *ap = atalk_find_dev_addr(dev);
- if (!ap || skb->len < sizeof(struct ddpshdr))
+ if (!ap || skb->len < sizeof(__be16) || skb->len > 1023)
goto freeit;
/* Don't mangle buffer if shared */
@@ -1522,11 +1515,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
/*
* Not sure about this bit...
*/
- ddp->deh_len = skb->len;
- ddp->deh_hops = DDP_MAXHOPS; /* Non routable, so force a drop
- if we slip up later */
- /* Mend the byte order */
- *((__u16 *)ddp) = htons(*((__u16 *)ddp));
+ /* Non routable, so force a drop if we slip up later */
+ ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
}
skb->h.raw = skb->data;
@@ -1625,16 +1615,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr));
- ddp->deh_pad = 0;
- ddp->deh_hops = 0;
- ddp->deh_len = len + sizeof(*ddp);
- /*
- * Fix up the length field [Ok this is horrible but otherwise
- * I end up with unions of bit fields and messy bit field order
- * compiler/endian dependencies..
- */
- *((__u16 *)ddp) = ntohs(*((__u16 *)ddp));
-
+ ddp->deh_len_hops = htons(len + sizeof(*ddp));
ddp->deh_dnet = usat->sat_addr.s_net;
ddp->deh_snet = at->src_net;
ddp->deh_dnode = usat->sat_addr.s_node;
@@ -1715,8 +1696,8 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
struct ddpehdr *ddp;
int copied = 0;
+ int offset = 0;
int err = 0;
- struct ddpebits ddphv;
struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
if (!skb)
@@ -1724,25 +1705,18 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
/* FIXME: use skb->cb to be able to use shared skbs */
ddp = ddp_hdr(skb);
- *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+ copied = ntohs(ddp->deh_len_hops) & 1023;
- if (sk->sk_type == SOCK_RAW) {
- copied = ddphv.deh_len;
- if (copied > size) {
- copied = size;
- msg->msg_flags |= MSG_TRUNC;
- }
+ if (sk->sk_type != SOCK_RAW) {
+ offset = sizeof(*ddp);
+ copied -= offset;
+ }
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
- } else {
- copied = ddphv.deh_len - sizeof(*ddp);
- if (copied > size) {
- copied = size;
- msg->msg_flags |= MSG_TRUNC;
- }
- err = skb_copy_datagram_iovec(skb, sizeof(*ddp),
- msg->msg_iov, copied);
+ if (copied > size) {
+ copied = size;
+ msg->msg_flags |= MSG_TRUNC;
}
+ err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
if (!err) {
if (sat) {
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index af7f0604395d..40b0af7437a2 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -6,7 +6,6 @@
* Dynamic registration, added aarp entries. (5/30/97 Chris Horn)
*/
-#include <linux/config.h>
#include <linux/sysctl.h>
#include <net/sock.h>
#include <linux/atalk.h>
diff --git a/net/atm/Makefile b/net/atm/Makefile
index d5818751f6ba..89656d6c0b90 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -2,7 +2,7 @@
# Makefile for the ATM Protocol Families.
#
-atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o
+atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o atm_sysfs.o
mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o
obj-$(CONFIG_ATM) += atm.o
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
new file mode 100644
index 000000000000..c0a4ae28fcfa
--- /dev/null
+++ b/net/atm/atm_sysfs.c
@@ -0,0 +1,175 @@
+/* ATM driver model support. */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/atmdev.h>
+#include "common.h"
+#include "resources.h"
+
+#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
+
+static ssize_t show_type(struct class_device *cdev, char *buf)
+{
+ struct atm_dev *adev = to_atm_dev(cdev);
+ return sprintf(buf, "%s\n", adev->type);
+}
+
+static ssize_t show_address(struct class_device *cdev, char *buf)
+{
+ char *pos = buf;
+ struct atm_dev *adev = to_atm_dev(cdev);
+ int i;
+
+ for (i = 0; i < (ESI_LEN - 1); i++)
+ pos += sprintf(pos, "%02x:", adev->esi[i]);
+ pos += sprintf(pos, "%02x\n", adev->esi[i]);
+
+ return pos - buf;
+}
+
+static ssize_t show_atmaddress(struct class_device *cdev, char *buf)
+{
+ unsigned long flags;
+ char *pos = buf;
+ struct atm_dev *adev = to_atm_dev(cdev);
+ struct atm_dev_addr *aaddr;
+ int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin;
+ int i, j;
+
+ spin_lock_irqsave(&adev->lock, flags);
+ list_for_each_entry(aaddr, &adev->local, entry) {
+ for(i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
+ if (j == *fmt) {
+ pos += sprintf(pos, ".");
+ ++fmt;
+ j = 0;
+ }
+ pos += sprintf(pos, "%02x", aaddr->addr.sas_addr.prv[i]);
+ }
+ pos += sprintf(pos, "\n");
+ }
+ spin_unlock_irqrestore(&adev->lock, flags);
+
+ return pos - buf;
+}
+
+static ssize_t show_carrier(struct class_device *cdev, char *buf)
+{
+ char *pos = buf;
+ struct atm_dev *adev = to_atm_dev(cdev);
+
+ pos += sprintf(pos, "%d\n",
+ adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
+
+ return pos - buf;
+}
+
+static ssize_t show_link_rate(struct class_device *cdev, char *buf)
+{
+ char *pos = buf;
+ struct atm_dev *adev = to_atm_dev(cdev);
+ int link_rate;
+
+ /* show the link rate, not the data rate */
+ switch (adev->link_rate) {
+ case ATM_OC3_PCR:
+ link_rate = 155520000;
+ break;
+ case ATM_OC12_PCR:
+ link_rate = 622080000;
+ break;
+ case ATM_25_PCR:
+ link_rate = 25600000;
+ break;
+ default:
+ link_rate = adev->link_rate * 8 * 53;
+ }
+ pos += sprintf(pos, "%d\n", link_rate);
+
+ return pos - buf;
+}
+
+static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
+static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
+static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+
+static struct class_device_attribute *atm_attrs[] = {
+ &class_device_attr_atmaddress,
+ &class_device_attr_address,
+ &class_device_attr_carrier,
+ &class_device_attr_type,
+ &class_device_attr_link_rate,
+ NULL
+};
+
+static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+{
+ struct atm_dev *adev;
+ int i = 0, len = 0;
+
+ if (!cdev)
+ return -ENODEV;
+
+ adev = to_atm_dev(cdev);
+ if (!adev)
+ return -ENODEV;
+
+ if (add_uevent_var(envp, num_envp, &i, buf, size, &len,
+ "NAME=%s%d", adev->type, adev->number))
+ return -ENOMEM;
+
+ envp[i] = NULL;
+ return 0;
+}
+
+static void atm_release(struct class_device *cdev)
+{
+ struct atm_dev *adev = to_atm_dev(cdev);
+
+ kfree(adev);
+}
+
+static struct class atm_class = {
+ .name = "atm",
+ .release = atm_release,
+ .uevent = atm_uevent,
+};
+
+int atm_register_sysfs(struct atm_dev *adev)
+{
+ struct class_device *cdev = &adev->class_dev;
+ int i, err;
+
+ cdev->class = &atm_class;
+ class_set_devdata(cdev, adev);
+
+ snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
+ err = class_device_register(cdev);
+ if (err < 0)
+ return err;
+
+ for (i = 0; atm_attrs[i]; i++)
+ class_device_create_file(cdev, atm_attrs[i]);
+
+ return 0;
+}
+
+void atm_unregister_sysfs(struct atm_dev *adev)
+{
+ struct class_device *cdev = &adev->class_dev;
+
+ class_device_del(cdev);
+}
+
+int __init atm_sysfs_init(void)
+{
+ return class_register(&atm_class);
+}
+
+void __exit atm_sysfs_exit(void)
+{
+ class_unregister(&atm_class);
+}
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 680ccb12aae8..d00cca97eb33 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -5,7 +5,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
@@ -509,10 +508,9 @@ Note: we do not have explicit unassign, but look at _push()
if (copy_from_user(&be, arg, sizeof be))
return -EFAULT;
- brvcc = kmalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
+ brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
if (!brvcc)
return -ENOMEM;
- memset(brvcc, 0, sizeof(struct br2684_vcc));
write_lock_irq(&devs_lock);
net_dev = br2684_find_dev(&be.ifspec);
if (net_dev == NULL) {
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f92f9c94d2c7..7af2c411da82 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -2,7 +2,6 @@
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
-#include <linux/config.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h> /* for UINT_MAX */
@@ -24,6 +23,7 @@
#include <linux/if.h> /* for IFF_UP */
#include <linux/inetdevice.h>
#include <linux/bitops.h>
+#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
@@ -267,7 +267,7 @@ static void clip_neigh_destroy(struct neighbour *neigh)
DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh);
if (NEIGH2ENTRY(neigh)->vccs)
printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n");
- NEIGH2ENTRY(neigh)->vccs = (void *) 0xdeadbeef;
+ NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD;
}
static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
@@ -500,9 +500,11 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
} else {
unsigned int len = skb->len;
+ skb_get(skb);
clip_push(vcc, skb);
PRIV(skb->dev)->stats.rx_packets--;
PRIV(skb->dev)->stats.rx_bytes -= len;
+ kfree_skb(skb);
}
return 0;
}
@@ -929,12 +931,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
struct seq_file *seq;
int rc = -EAGAIN;
- state = kmalloc(sizeof(*state), GFP_KERNEL);
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
rc = -ENOMEM;
goto out_kfree;
}
- memset(state, 0, sizeof(*state));
state->ns.neigh_sub_iter = clip_seq_sub_iter;
rc = seq_open(file, &arp_seq_ops);
@@ -962,7 +963,6 @@ static struct file_operations arp_seq_fops = {
static int __init atm_clip_init(void)
{
- struct proc_dir_entry *p;
neigh_table_init_no_netlink(&clip_tbl);
clip_tbl_hook = &clip_tbl;
@@ -972,9 +972,15 @@ static int __init atm_clip_init(void)
setup_timer(&idle_timer, idle_timer_check, 0);
- p = create_proc_entry("arp", S_IRUGO, atm_proc_root);
- if (p)
- p->proc_fops = &arp_seq_fops;
+#ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *p;
+
+ p = create_proc_entry("arp", S_IRUGO, atm_proc_root);
+ if (p)
+ p->proc_fops = &arp_seq_fops;
+ }
+#endif
return 0;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index ae002220fa99..fbabff494468 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -3,7 +3,6 @@
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/net.h> /* struct socket, struct proto_ops */
@@ -791,8 +790,14 @@ static int __init atm_init(void)
printk(KERN_ERR "atm_proc_init() failed with %d\n",error);
goto out_atmsvc_exit;
}
+ if ((error = atm_sysfs_init()) < 0) {
+ printk(KERN_ERR "atm_sysfs_init() failed with %d\n",error);
+ goto out_atmproc_exit;
+ }
out:
return error;
+out_atmproc_exit:
+ atm_proc_exit();
out_atmsvc_exit:
atmsvc_exit();
out_atmpvc_exit:
@@ -805,6 +810,7 @@ out_unregister_vcc_proto:
static void __exit atm_exit(void)
{
atm_proc_exit();
+ atm_sysfs_exit();
atmsvc_exit();
atmpvc_exit();
proto_unregister(&vcc_proto);
diff --git a/net/atm/common.h b/net/atm/common.h
index 4887c317cefe..a422da7788fb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -28,6 +28,8 @@ int atmpvc_init(void);
void atmpvc_exit(void);
int atmsvc_init(void);
void atmsvc_exit(void);
+int atm_sysfs_init(void);
+void atm_sysfs_exit(void);
#ifdef CONFIG_PROC_FS
int atm_proc_init(void);
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 851cfa6312af..8c2022c3e81d 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -4,7 +4,6 @@
/* 2003 John Levon <levon@movementarian.org> */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/net.h> /* struct socket, struct proto_ops */
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c
index 4b1faca5013f..1d3de42fada0 100644
--- a/net/atm/ipcommon.c
+++ b/net/atm/ipcommon.c
@@ -25,22 +25,27 @@
/*
* skb_migrate appends the list at "from" to "to", emptying "from" in the
* process. skb_migrate is atomic with respect to all other skb operations on
- * "from" and "to". Note that it locks both lists at the same time, so beware
- * of potential deadlocks.
+ * "from" and "to". Note that it locks both lists at the same time, so to deal
+ * with the lock ordering, the locks are taken in address order.
*
* This function should live in skbuff.c or skbuff.h.
*/
-void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
+void skb_migrate(struct sk_buff_head *from, struct sk_buff_head *to)
{
unsigned long flags;
struct sk_buff *skb_from = (struct sk_buff *) from;
struct sk_buff *skb_to = (struct sk_buff *) to;
struct sk_buff *prev;
- spin_lock_irqsave(&from->lock,flags);
- spin_lock(&to->lock);
+ if ((unsigned long) from < (unsigned long) to) {
+ spin_lock_irqsave(&from->lock, flags);
+ spin_lock_nested(&to->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irqsave(&to->lock, flags);
+ spin_lock_nested(&from->lock, SINGLE_DEPTH_NESTING);
+ }
prev = from->prev;
from->next->prev = to->prev;
prev->next = skb_to;
@@ -51,7 +56,7 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
from->prev = skb_from;
from->next = skb_from;
from->qlen = 0;
- spin_unlock_irqrestore(&from->lock,flags);
+ spin_unlock_irqrestore(&from->lock, flags);
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index c4fc722fef9a..66c57c1091a8 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1,10 +1,9 @@
/*
* lec.c: Lan Emulation driver
- * Marko Kiiskila mkiiskila@yahoo.com
*
+ * Marko Kiiskila <mkiiskila@yahoo.com>
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/capability.h>
@@ -39,7 +38,7 @@
#include <linux/if_bridge.h>
#include "../bridge/br_private.h"
-static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00};
+static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
#endif
/* Modular too */
@@ -56,38 +55,41 @@ static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00};
#define DPRINTK(format,args...)
#endif
-#define DUMP_PACKETS 0 /* 0 = None,
- * 1 = 30 first bytes
- * 2 = Whole packet
- */
+#define DUMP_PACKETS 0 /*
+ * 0 = None,
+ * 1 = 30 first bytes
+ * 2 = Whole packet
+ */
-#define LEC_UNRES_QUE_LEN 8 /* number of tx packets to queue for a
- single destination while waiting for SVC */
+#define LEC_UNRES_QUE_LEN 8 /*
+ * number of tx packets to queue for a
+ * single destination while waiting for SVC
+ */
static int lec_open(struct net_device *dev);
static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev);
static int lec_close(struct net_device *dev);
static struct net_device_stats *lec_get_stats(struct net_device *dev);
static void lec_init(struct net_device *dev);
-static struct lec_arp_table* lec_arp_find(struct lec_priv *priv,
- unsigned char *mac_addr);
+static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
+ unsigned char *mac_addr);
static int lec_arp_remove(struct lec_priv *priv,
- struct lec_arp_table *to_remove);
+ struct lec_arp_table *to_remove);
/* LANE2 functions */
-static void lane2_associate_ind (struct net_device *dev, u8 *mac_address,
- u8 *tlvs, u32 sizeoftlvs);
+static void lane2_associate_ind(struct net_device *dev, u8 *mac_address,
+ u8 *tlvs, u32 sizeoftlvs);
static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
- u8 **tlvs, u32 *sizeoftlvs);
-static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs);
+ u8 **tlvs, u32 *sizeoftlvs);
+static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
+ u8 *tlvs, u32 sizeoftlvs);
-static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
unsigned long permanent);
static void lec_arp_check_empties(struct lec_priv *priv,
struct atm_vcc *vcc, struct sk_buff *skb);
static void lec_arp_destroy(struct lec_priv *priv);
static void lec_arp_init(struct lec_priv *priv);
-static struct atm_vcc* lec_arp_resolve(struct lec_priv *priv,
+static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
unsigned char *mac_to_find,
int is_rdesc,
struct lec_arp_table **ret_entry);
@@ -101,16 +103,30 @@ static void lec_set_flush_tran_id(struct lec_priv *priv,
unsigned long tran_id);
static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
struct atm_vcc *vcc,
- void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb));
+ void (*old_push) (struct atm_vcc *vcc,
+ struct sk_buff *skb));
static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc);
+/* must be done under lec_arp_lock */
+static inline void lec_arp_hold(struct lec_arp_table *entry)
+{
+ atomic_inc(&entry->usage);
+}
+
+static inline void lec_arp_put(struct lec_arp_table *entry)
+{
+ if (atomic_dec_and_test(&entry->usage))
+ kfree(entry);
+}
+
+
static struct lane2_ops lane2_ops = {
- lane2_resolve, /* resolve, spec 3.1.3 */
- lane2_associate_req, /* associate_req, spec 3.1.4 */
- NULL /* associate indicator, spec 3.1.5 */
+ lane2_resolve, /* resolve, spec 3.1.3 */
+ lane2_associate_req, /* associate_req, spec 3.1.4 */
+ NULL /* associate indicator, spec 3.1.5 */
};
-static unsigned char bus_mac[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
+static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
/* Device structures */
static struct net_device *dev_lec[MAX_LEC_ITF];
@@ -118,36 +134,39 @@ static struct net_device *dev_lec[MAX_LEC_ITF];
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
{
- struct ethhdr *eth;
- char *buff;
- struct lec_priv *priv;
-
- /* Check if this is a BPDU. If so, ask zeppelin to send
- * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit
- * as the Config BPDU has */
- eth = (struct ethhdr *)skb->data;
- buff = skb->data + skb->dev->hard_header_len;
- if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) {
+ struct ethhdr *eth;
+ char *buff;
+ struct lec_priv *priv;
+
+ /*
+ * Check if this is a BPDU. If so, ask zeppelin to send
+ * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit
+ * as the Config BPDU has
+ */
+ eth = (struct ethhdr *)skb->data;
+ buff = skb->data + skb->dev->hard_header_len;
+ if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) {
struct sock *sk;
- struct sk_buff *skb2;
- struct atmlec_msg *mesg;
-
- skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
- if (skb2 == NULL) return;
- skb2->len = sizeof(struct atmlec_msg);
- mesg = (struct atmlec_msg *)skb2->data;
- mesg->type = l_topology_change;
- buff += 4;
- mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */
-
- priv = (struct lec_priv *)dev->priv;
- atm_force_charge(priv->lecd, skb2->truesize);
+ struct sk_buff *skb2;
+ struct atmlec_msg *mesg;
+
+ skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ skb2->len = sizeof(struct atmlec_msg);
+ mesg = (struct atmlec_msg *)skb2->data;
+ mesg->type = l_topology_change;
+ buff += 4;
+ mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */
+
+ priv = (struct lec_priv *)dev->priv;
+ atm_force_charge(priv->lecd, skb2->truesize);
sk = sk_atm(priv->lecd);
- skb_queue_tail(&sk->sk_receive_queue, skb2);
- sk->sk_data_ready(sk, skb2->len);
- }
+ skb_queue_tail(&sk->sk_receive_queue, skb2);
+ sk->sk_data_ready(sk, skb2->len);
+ }
- return;
+ return;
}
#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
@@ -163,36 +182,35 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
#ifdef CONFIG_TR
static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
{
- struct trh_hdr *trh;
- int riflen, num_rdsc;
-
- trh = (struct trh_hdr *)packet;
- if (trh->daddr[0] & (uint8_t)0x80)
- return bus_mac; /* multicast */
-
- if (trh->saddr[0] & TR_RII) {
- riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
- if ((ntohs(trh->rcf) >> 13) != 0)
- return bus_mac; /* ARE or STE */
- }
- else
- return trh->daddr; /* not source routed */
-
- if (riflen < 6)
- return trh->daddr; /* last hop, source routed */
-
- /* riflen is 6 or more, packet has more than one route descriptor */
- num_rdsc = (riflen/2) - 1;
- memset(rdesc, 0, ETH_ALEN);
- /* offset 4 comes from LAN destination field in LE control frames */
- if (trh->rcf & htons((uint16_t)TR_RCF_DIR_BIT))
- memcpy(&rdesc[4], &trh->rseg[num_rdsc-2], sizeof(uint16_t));
- else {
- memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t));
- rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0));
- }
-
- return NULL;
+ struct trh_hdr *trh;
+ int riflen, num_rdsc;
+
+ trh = (struct trh_hdr *)packet;
+ if (trh->daddr[0] & (uint8_t) 0x80)
+ return bus_mac; /* multicast */
+
+ if (trh->saddr[0] & TR_RII) {
+ riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+ if ((ntohs(trh->rcf) >> 13) != 0)
+ return bus_mac; /* ARE or STE */
+ } else
+ return trh->daddr; /* not source routed */
+
+ if (riflen < 6)
+ return trh->daddr; /* last hop, source routed */
+
+ /* riflen is 6 or more, packet has more than one route descriptor */
+ num_rdsc = (riflen / 2) - 1;
+ memset(rdesc, 0, ETH_ALEN);
+ /* offset 4 comes from LAN destination field in LE control frames */
+ if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT))
+ memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(uint16_t));
+ else {
+ memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t));
+ rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0));
+ }
+
+ return NULL;
}
#endif /* CONFIG_TR */
@@ -205,15 +223,14 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
* there is non-reboot way to recover if something goes wrong.
*/
-static int
-lec_open(struct net_device *dev)
+static int lec_open(struct net_device *dev)
{
- struct lec_priv *priv = (struct lec_priv *)dev->priv;
-
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
+
netif_start_queue(dev);
- memset(&priv->stats,0,sizeof(struct net_device_stats));
-
- return 0;
+ memset(&priv->stats, 0, sizeof(struct net_device_stats));
+
+ return 0;
}
static __inline__ void
@@ -232,160 +249,166 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv)
priv->stats.tx_bytes += skb->len;
}
-static void
-lec_tx_timeout(struct net_device *dev)
+static void lec_tx_timeout(struct net_device *dev)
{
printk(KERN_INFO "%s: tx timeout\n", dev->name);
dev->trans_start = jiffies;
netif_wake_queue(dev);
}
-static int
-lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct sk_buff *skb2;
- struct lec_priv *priv = (struct lec_priv *)dev->priv;
- struct lecdatahdr_8023 *lec_h;
- struct atm_vcc *vcc;
+ struct sk_buff *skb2;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
+ struct lecdatahdr_8023 *lec_h;
+ struct atm_vcc *vcc;
struct lec_arp_table *entry;
- unsigned char *dst;
+ unsigned char *dst;
int min_frame_size;
#ifdef CONFIG_TR
- unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */
+ unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */
#endif
- int is_rdesc;
+ int is_rdesc;
#if DUMP_PACKETS > 0
- char buf[300];
- int i=0;
+ char buf[300];
+ int i = 0;
#endif /* DUMP_PACKETS >0 */
-
- DPRINTK("lec_start_xmit called\n");
- if (!priv->lecd) {
- printk("%s:No lecd attached\n",dev->name);
- priv->stats.tx_errors++;
- netif_stop_queue(dev);
- return -EUNATCH;
- }
-
- DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
- (long)skb->head, (long)skb->data, (long)skb->tail,
- (long)skb->end);
+
+ DPRINTK("lec_start_xmit called\n");
+ if (!priv->lecd) {
+ printk("%s:No lecd attached\n", dev->name);
+ priv->stats.tx_errors++;
+ netif_stop_queue(dev);
+ return -EUNATCH;
+ }
+
+ DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
+ (long)skb->head, (long)skb->data, (long)skb->tail,
+ (long)skb->end);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
- if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
- lec_handle_bridge(skb, dev);
+ if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
+ lec_handle_bridge(skb, dev);
#endif
- /* Make sure we have room for lec_id */
- if (skb_headroom(skb) < 2) {
+ /* Make sure we have room for lec_id */
+ if (skb_headroom(skb) < 2) {
- DPRINTK("lec_start_xmit: reallocating skb\n");
- skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
- kfree_skb(skb);
- if (skb2 == NULL) return 0;
- skb = skb2;
- }
- skb_push(skb, 2);
+ DPRINTK("lec_start_xmit: reallocating skb\n");
+ skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
+ kfree_skb(skb);
+ if (skb2 == NULL)
+ return 0;
+ skb = skb2;
+ }
+ skb_push(skb, 2);
- /* Put le header to place, works for TokenRing too */
- lec_h = (struct lecdatahdr_8023*)skb->data;
- lec_h->le_header = htons(priv->lecid);
+ /* Put le header to place, works for TokenRing too */
+ lec_h = (struct lecdatahdr_8023 *)skb->data;
+ lec_h->le_header = htons(priv->lecid);
#ifdef CONFIG_TR
- /* Ugly. Use this to realign Token Ring packets for
- * e.g. PCA-200E driver. */
- if (priv->is_trdev) {
- skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
- kfree_skb(skb);
- if (skb2 == NULL) return 0;
- skb = skb2;
- }
+ /*
+ * Ugly. Use this to realign Token Ring packets for
+ * e.g. PCA-200E driver.
+ */
+ if (priv->is_trdev) {
+ skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
+ kfree_skb(skb);
+ if (skb2 == NULL)
+ return 0;
+ skb = skb2;
+ }
#endif
#if DUMP_PACKETS > 0
- printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name,
- skb->len, priv->lecid);
+ printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name,
+ skb->len, priv->lecid);
#if DUMP_PACKETS >= 2
- for(i=0;i<skb->len && i <99;i++) {
- sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]);
- }
+ for (i = 0; i < skb->len && i < 99; i++) {
+ sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
+ }
#elif DUMP_PACKETS >= 1
- for(i=0;i<skb->len && i < 30;i++) {
- sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]);
- }
+ for (i = 0; i < skb->len && i < 30; i++) {
+ sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
+ }
#endif /* DUMP_PACKETS >= 1 */
- if (i==skb->len)
- printk("%s\n",buf);
- else
- printk("%s...\n",buf);
+ if (i == skb->len)
+ printk("%s\n", buf);
+ else
+ printk("%s...\n", buf);
#endif /* DUMP_PACKETS > 0 */
- /* Minimum ethernet-frame size */
+ /* Minimum ethernet-frame size */
#ifdef CONFIG_TR
- if (priv->is_trdev)
- min_frame_size = LEC_MINIMUM_8025_SIZE;
+ if (priv->is_trdev)
+ min_frame_size = LEC_MINIMUM_8025_SIZE;
else
#endif
- min_frame_size = LEC_MINIMUM_8023_SIZE;
- if (skb->len < min_frame_size) {
- if ((skb->len + skb_tailroom(skb)) < min_frame_size) {
- skb2 = skb_copy_expand(skb, 0,
- min_frame_size - skb->truesize, GFP_ATOMIC);
- dev_kfree_skb(skb);
- if (skb2 == NULL) {
- priv->stats.tx_dropped++;
- return 0;
- }
- skb = skb2;
- }
+ min_frame_size = LEC_MINIMUM_8023_SIZE;
+ if (skb->len < min_frame_size) {
+ if ((skb->len + skb_tailroom(skb)) < min_frame_size) {
+ skb2 = skb_copy_expand(skb, 0,
+ min_frame_size - skb->truesize,
+ GFP_ATOMIC);
+ dev_kfree_skb(skb);
+ if (skb2 == NULL) {
+ priv->stats.tx_dropped++;
+ return 0;
+ }
+ skb = skb2;
+ }
skb_put(skb, min_frame_size - skb->len);
- }
-
- /* Send to right vcc */
- is_rdesc = 0;
- dst = lec_h->h_dest;
+ }
+
+ /* Send to right vcc */
+ is_rdesc = 0;
+ dst = lec_h->h_dest;
#ifdef CONFIG_TR
- if (priv->is_trdev) {
- dst = get_tr_dst(skb->data+2, rdesc);
- if (dst == NULL) {
- dst = rdesc;
- is_rdesc = 1;
- }
- }
+ if (priv->is_trdev) {
+ dst = get_tr_dst(skb->data + 2, rdesc);
+ if (dst == NULL) {
+ dst = rdesc;
+ is_rdesc = 1;
+ }
+ }
#endif
- entry = NULL;
- vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry);
- DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name,
- vcc, vcc?vcc->flags:0, entry);
- if (!vcc || !test_bit(ATM_VF_READY,&vcc->flags)) {
- if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) {
- DPRINTK("%s:lec_start_xmit: queuing packet, ", dev->name);
- DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
- lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
- lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
- skb_queue_tail(&entry->tx_wait, skb);
- } else {
- DPRINTK("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name);
- DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
- lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
- lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
- priv->stats.tx_dropped++;
- dev_kfree_skb(skb);
- }
- return 0;
- }
-
-#if DUMP_PACKETS > 0
- printk("%s:sending to vpi:%d vci:%d\n", dev->name,
- vcc->vpi, vcc->vci);
+ entry = NULL;
+ vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry);
+ DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name,
+ vcc, vcc ? vcc->flags : 0, entry);
+ if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) {
+ if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) {
+ DPRINTK("%s:lec_start_xmit: queuing packet, ",
+ dev->name);
+ DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+ lec_h->h_dest[0], lec_h->h_dest[1],
+ lec_h->h_dest[2], lec_h->h_dest[3],
+ lec_h->h_dest[4], lec_h->h_dest[5]);
+ skb_queue_tail(&entry->tx_wait, skb);
+ } else {
+ DPRINTK
+ ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ",
+ dev->name);
+ DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+ lec_h->h_dest[0], lec_h->h_dest[1],
+ lec_h->h_dest[2], lec_h->h_dest[3],
+ lec_h->h_dest[4], lec_h->h_dest[5]);
+ priv->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ }
+ goto out;
+ }
+#if DUMP_PACKETS > 0
+ printk("%s:sending to vpi:%d vci:%d\n", dev->name, vcc->vpi, vcc->vci);
#endif /* DUMP_PACKETS > 0 */
-
- while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
- DPRINTK("lec.c: emptying tx queue, ");
- DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
- lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
- lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
+
+ while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
+ DPRINTK("lec.c: emptying tx queue, ");
+ DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+ lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
+ lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
lec_send(vcc, skb2, priv);
- }
+ }
lec_send(vcc, skb, priv);
@@ -405,210 +428,219 @@ lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif_wake_queue(dev);
}
+out:
+ if (entry)
+ lec_arp_put(entry);
dev->trans_start = jiffies;
- return 0;
+ return 0;
}
/* The inverse routine to net_open(). */
-static int
-lec_close(struct net_device *dev)
+static int lec_close(struct net_device *dev)
{
- netif_stop_queue(dev);
- return 0;
+ netif_stop_queue(dev);
+ return 0;
}
/*
* Get the current statistics.
* This may be called with the card open or closed.
*/
-static struct net_device_stats *
-lec_get_stats(struct net_device *dev)
+static struct net_device_stats *lec_get_stats(struct net_device *dev)
{
- return &((struct lec_priv *)dev->priv)->stats;
+ return &((struct lec_priv *)dev->priv)->stats;
}
-static int
-lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
+static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
{
unsigned long flags;
- struct net_device *dev = (struct net_device*)vcc->proto_data;
- struct lec_priv *priv = (struct lec_priv*)dev->priv;
- struct atmlec_msg *mesg;
- struct lec_arp_table *entry;
- int i;
- char *tmp; /* FIXME */
+ struct net_device *dev = (struct net_device *)vcc->proto_data;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
+ struct atmlec_msg *mesg;
+ struct lec_arp_table *entry;
+ int i;
+ char *tmp; /* FIXME */
atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
- mesg = (struct atmlec_msg *)skb->data;
- tmp = skb->data;
- tmp += sizeof(struct atmlec_msg);
- DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type);
- switch(mesg->type) {
- case l_set_mac_addr:
- for (i=0;i<6;i++) {
- dev->dev_addr[i] = mesg->content.normal.mac_addr[i];
- }
- break;
- case l_del_mac_addr:
- for(i=0;i<6;i++) {
- dev->dev_addr[i] = 0;
- }
- break;
- case l_addr_delete:
- lec_addr_delete(priv, mesg->content.normal.atm_addr,
- mesg->content.normal.flag);
- break;
- case l_topology_change:
- priv->topology_change = mesg->content.normal.flag;
- break;
- case l_flush_complete:
- lec_flush_complete(priv, mesg->content.normal.flag);
- break;
- case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */
+ mesg = (struct atmlec_msg *)skb->data;
+ tmp = skb->data;
+ tmp += sizeof(struct atmlec_msg);
+ DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type);
+ switch (mesg->type) {
+ case l_set_mac_addr:
+ for (i = 0; i < 6; i++) {
+ dev->dev_addr[i] = mesg->content.normal.mac_addr[i];
+ }
+ break;
+ case l_del_mac_addr:
+ for (i = 0; i < 6; i++) {
+ dev->dev_addr[i] = 0;
+ }
+ break;
+ case l_addr_delete:
+ lec_addr_delete(priv, mesg->content.normal.atm_addr,
+ mesg->content.normal.flag);
+ break;
+ case l_topology_change:
+ priv->topology_change = mesg->content.normal.flag;
+ break;
+ case l_flush_complete:
+ lec_flush_complete(priv, mesg->content.normal.flag);
+ break;
+ case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- entry = lec_arp_find(priv, mesg->content.normal.mac_addr);
- lec_arp_remove(priv, entry);
+ entry = lec_arp_find(priv, mesg->content.normal.mac_addr);
+ lec_arp_remove(priv, entry);
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- if (mesg->content.normal.no_source_le_narp)
- break;
- /* FALL THROUGH */
- case l_arp_update:
- lec_arp_update(priv, mesg->content.normal.mac_addr,
- mesg->content.normal.atm_addr,
- mesg->content.normal.flag,
- mesg->content.normal.targetless_le_arp);
- DPRINTK("lec: in l_arp_update\n");
- if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */
- DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs);
- lane2_associate_ind(dev,
- mesg->content.normal.mac_addr,
- tmp, mesg->sizeoftlvs);
- }
- break;
- case l_config:
- priv->maximum_unknown_frame_count =
- mesg->content.config.maximum_unknown_frame_count;
- priv->max_unknown_frame_time =
- (mesg->content.config.max_unknown_frame_time*HZ);
- priv->max_retry_count =
- mesg->content.config.max_retry_count;
- priv->aging_time = (mesg->content.config.aging_time*HZ);
- priv->forward_delay_time =
- (mesg->content.config.forward_delay_time*HZ);
- priv->arp_response_time =
- (mesg->content.config.arp_response_time*HZ);
- priv->flush_timeout = (mesg->content.config.flush_timeout*HZ);
- priv->path_switching_delay =
- (mesg->content.config.path_switching_delay*HZ);
- priv->lane_version = mesg->content.config.lane_version; /* LANE2 */
+ if (mesg->content.normal.no_source_le_narp)
+ break;
+ /* FALL THROUGH */
+ case l_arp_update:
+ lec_arp_update(priv, mesg->content.normal.mac_addr,
+ mesg->content.normal.atm_addr,
+ mesg->content.normal.flag,
+ mesg->content.normal.targetless_le_arp);
+ DPRINTK("lec: in l_arp_update\n");
+ if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */
+ DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n",
+ mesg->sizeoftlvs);
+ lane2_associate_ind(dev, mesg->content.normal.mac_addr,
+ tmp, mesg->sizeoftlvs);
+ }
+ break;
+ case l_config:
+ priv->maximum_unknown_frame_count =
+ mesg->content.config.maximum_unknown_frame_count;
+ priv->max_unknown_frame_time =
+ (mesg->content.config.max_unknown_frame_time * HZ);
+ priv->max_retry_count = mesg->content.config.max_retry_count;
+ priv->aging_time = (mesg->content.config.aging_time * HZ);
+ priv->forward_delay_time =
+ (mesg->content.config.forward_delay_time * HZ);
+ priv->arp_response_time =
+ (mesg->content.config.arp_response_time * HZ);
+ priv->flush_timeout = (mesg->content.config.flush_timeout * HZ);
+ priv->path_switching_delay =
+ (mesg->content.config.path_switching_delay * HZ);
+ priv->lane_version = mesg->content.config.lane_version; /* LANE2 */
priv->lane2_ops = NULL;
if (priv->lane_version > 1)
priv->lane2_ops = &lane2_ops;
if (dev->change_mtu(dev, mesg->content.config.mtu))
printk("%s: change_mtu to %d failed\n", dev->name,
- mesg->content.config.mtu);
+ mesg->content.config.mtu);
priv->is_proxy = mesg->content.config.is_proxy;
- break;
- case l_flush_tran_id:
- lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr,
- mesg->content.normal.flag);
- break;
- case l_set_lecid:
- priv->lecid=(unsigned short)(0xffff&mesg->content.normal.flag);
- break;
- case l_should_bridge: {
+ break;
+ case l_flush_tran_id:
+ lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr,
+ mesg->content.normal.flag);
+ break;
+ case l_set_lecid:
+ priv->lecid =
+ (unsigned short)(0xffff & mesg->content.normal.flag);
+ break;
+ case l_should_bridge:
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
- struct net_bridge_fdb_entry *f;
-
- DPRINTK("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
- dev->name,
- mesg->content.proxy.mac_addr[0], mesg->content.proxy.mac_addr[1],
- mesg->content.proxy.mac_addr[2], mesg->content.proxy.mac_addr[3],
- mesg->content.proxy.mac_addr[4], mesg->content.proxy.mac_addr[5]);
-
- if (br_fdb_get_hook == NULL || dev->br_port == NULL)
- break;
-
- f = br_fdb_get_hook(dev->br_port->br, mesg->content.proxy.mac_addr);
- if (f != NULL &&
- f->dst->dev != dev &&
- f->dst->state == BR_STATE_FORWARDING) {
- /* hit from bridge table, send LE_ARP_RESPONSE */
- struct sk_buff *skb2;
- struct sock *sk;
-
- DPRINTK("%s: entry found, responding to zeppelin\n", dev->name);
- skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
- if (skb2 == NULL) {
- br_fdb_put_hook(f);
- break;
- }
- skb2->len = sizeof(struct atmlec_msg);
- memcpy(skb2->data, mesg, sizeof(struct atmlec_msg));
- atm_force_charge(priv->lecd, skb2->truesize);
- sk = sk_atm(priv->lecd);
- skb_queue_tail(&sk->sk_receive_queue, skb2);
- sk->sk_data_ready(sk, skb2->len);
- }
- if (f != NULL) br_fdb_put_hook(f);
+ {
+ struct net_bridge_fdb_entry *f;
+
+ DPRINTK
+ ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+ dev->name, mesg->content.proxy.mac_addr[0],
+ mesg->content.proxy.mac_addr[1],
+ mesg->content.proxy.mac_addr[2],
+ mesg->content.proxy.mac_addr[3],
+ mesg->content.proxy.mac_addr[4],
+ mesg->content.proxy.mac_addr[5]);
+
+ if (br_fdb_get_hook == NULL || dev->br_port == NULL)
+ break;
+
+ f = br_fdb_get_hook(dev->br_port->br,
+ mesg->content.proxy.mac_addr);
+ if (f != NULL && f->dst->dev != dev
+ && f->dst->state == BR_STATE_FORWARDING) {
+ /* hit from bridge table, send LE_ARP_RESPONSE */
+ struct sk_buff *skb2;
+ struct sock *sk;
+
+ DPRINTK
+ ("%s: entry found, responding to zeppelin\n",
+ dev->name);
+ skb2 =
+ alloc_skb(sizeof(struct atmlec_msg),
+ GFP_ATOMIC);
+ if (skb2 == NULL) {
+ br_fdb_put_hook(f);
+ break;
+ }
+ skb2->len = sizeof(struct atmlec_msg);
+ memcpy(skb2->data, mesg,
+ sizeof(struct atmlec_msg));
+ atm_force_charge(priv->lecd, skb2->truesize);
+ sk = sk_atm(priv->lecd);
+ skb_queue_tail(&sk->sk_receive_queue, skb2);
+ sk->sk_data_ready(sk, skb2->len);
+ }
+ if (f != NULL)
+ br_fdb_put_hook(f);
+ }
#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
- }
- break;
- default:
- printk("%s: Unknown message type %d\n", dev->name, mesg->type);
- dev_kfree_skb(skb);
- return -EINVAL;
- }
- dev_kfree_skb(skb);
- return 0;
+ break;
+ default:
+ printk("%s: Unknown message type %d\n", dev->name, mesg->type);
+ dev_kfree_skb(skb);
+ return -EINVAL;
+ }
+ dev_kfree_skb(skb);
+ return 0;
}
-static void
-lec_atm_close(struct atm_vcc *vcc)
+static void lec_atm_close(struct atm_vcc *vcc)
{
- struct sk_buff *skb;
- struct net_device *dev = (struct net_device *)vcc->proto_data;
- struct lec_priv *priv = (struct lec_priv *)dev->priv;
+ struct sk_buff *skb;
+ struct net_device *dev = (struct net_device *)vcc->proto_data;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
- priv->lecd = NULL;
- /* Do something needful? */
+ priv->lecd = NULL;
+ /* Do something needful? */
- netif_stop_queue(dev);
- lec_arp_destroy(priv);
+ netif_stop_queue(dev);
+ lec_arp_destroy(priv);
- if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
+ if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
printk("%s lec_atm_close: closing with messages pending\n",
- dev->name);
- while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) {
- atm_return(vcc, skb->truesize);
+ dev->name);
+ while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) {
+ atm_return(vcc, skb->truesize);
dev_kfree_skb(skb);
- }
-
+ }
+
printk("%s: Shut down!\n", dev->name);
- module_put(THIS_MODULE);
+ module_put(THIS_MODULE);
}
static struct atmdev_ops lecdev_ops = {
- .close = lec_atm_close,
- .send = lec_atm_send
+ .close = lec_atm_close,
+ .send = lec_atm_send
};
static struct atm_dev lecatm_dev = {
- .ops = &lecdev_ops,
- .type = "lec",
- .number = 999, /* dummy device number */
- .lock = SPIN_LOCK_UNLOCKED
+ .ops = &lecdev_ops,
+ .type = "lec",
+ .number = 999, /* dummy device number */
+ .lock = SPIN_LOCK_UNLOCKED
};
/*
* LANE2: new argument struct sk_buff *data contains
* the LE_ARP based TLVs introduced in the LANE2 spec
*/
-static int
-send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
- unsigned char *mac_addr, unsigned char *atm_addr,
- struct sk_buff *data)
+static int
+send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
+ unsigned char *mac_addr, unsigned char *atm_addr,
+ struct sk_buff *data)
{
struct sock *sk;
struct sk_buff *skb;
@@ -622,187 +654,193 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
return -1;
skb->len = sizeof(struct atmlec_msg);
mesg = (struct atmlec_msg *)skb->data;
- memset(mesg, 0, sizeof(struct atmlec_msg));
+ memset(mesg, 0, sizeof(struct atmlec_msg));
mesg->type = type;
- if (data != NULL)
- mesg->sizeoftlvs = data->len;
+ if (data != NULL)
+ mesg->sizeoftlvs = data->len;
if (mac_addr)
memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN);
- else
- mesg->content.normal.targetless_le_arp = 1;
+ else
+ mesg->content.normal.targetless_le_arp = 1;
if (atm_addr)
memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN);
- atm_force_charge(priv->lecd, skb->truesize);
+ atm_force_charge(priv->lecd, skb->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk, skb->len);
- if (data != NULL) {
- DPRINTK("lec: about to send %d bytes of data\n", data->len);
- atm_force_charge(priv->lecd, data->truesize);
- skb_queue_tail(&sk->sk_receive_queue, data);
- sk->sk_data_ready(sk, skb->len);
- }
+ if (data != NULL) {
+ DPRINTK("lec: about to send %d bytes of data\n", data->len);
+ atm_force_charge(priv->lecd, data->truesize);
+ skb_queue_tail(&sk->sk_receive_queue, data);
+ sk->sk_data_ready(sk, skb->len);
+ }
- return 0;
+ return 0;
}
/* shamelessly stolen from drivers/net/net_init.c */
static int lec_change_mtu(struct net_device *dev, int new_mtu)
{
- if ((new_mtu < 68) || (new_mtu > 18190))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
+ if ((new_mtu < 68) || (new_mtu > 18190))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
}
static void lec_set_multicast_list(struct net_device *dev)
{
- /* by default, all multicast frames arrive over the bus.
- * eventually support selective multicast service
- */
- return;
+ /*
+ * by default, all multicast frames arrive over the bus.
+ * eventually support selective multicast service
+ */
+ return;
}
-static void
-lec_init(struct net_device *dev)
+static void lec_init(struct net_device *dev)
{
- dev->change_mtu = lec_change_mtu;
- dev->open = lec_open;
- dev->stop = lec_close;
- dev->hard_start_xmit = lec_start_xmit;
+ dev->change_mtu = lec_change_mtu;
+ dev->open = lec_open;
+ dev->stop = lec_close;
+ dev->hard_start_xmit = lec_start_xmit;
dev->tx_timeout = lec_tx_timeout;
- dev->get_stats = lec_get_stats;
- dev->set_multicast_list = lec_set_multicast_list;
- dev->do_ioctl = NULL;
- printk("%s: Initialized!\n",dev->name);
- return;
+ dev->get_stats = lec_get_stats;
+ dev->set_multicast_list = lec_set_multicast_list;
+ dev->do_ioctl = NULL;
+ printk("%s: Initialized!\n", dev->name);
+ return;
}
static unsigned char lec_ctrl_magic[] = {
- 0xff,
- 0x00,
- 0x01,
- 0x01 };
+ 0xff,
+ 0x00,
+ 0x01,
+ 0x01
+};
#define LEC_DATA_DIRECT_8023 2
#define LEC_DATA_DIRECT_8025 3
static int lec_is_data_direct(struct atm_vcc *vcc)
-{
+{
return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) ||
(vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025));
-}
+}
-static void
-lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
+static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
{
unsigned long flags;
- struct net_device *dev = (struct net_device *)vcc->proto_data;
- struct lec_priv *priv = (struct lec_priv *)dev->priv;
+ struct net_device *dev = (struct net_device *)vcc->proto_data;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
#if DUMP_PACKETS >0
- int i=0;
- char buf[300];
+ int i = 0;
+ char buf[300];
- printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name,
- vcc->vpi, vcc->vci);
+ printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name,
+ vcc->vpi, vcc->vci);
#endif
- if (!skb) {
- DPRINTK("%s: null skb\n",dev->name);
- lec_vcc_close(priv, vcc);
- return;
- }
+ if (!skb) {
+ DPRINTK("%s: null skb\n", dev->name);
+ lec_vcc_close(priv, vcc);
+ return;
+ }
#if DUMP_PACKETS > 0
- printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name,
- skb->len, priv->lecid);
+ printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name,
+ skb->len, priv->lecid);
#if DUMP_PACKETS >= 2
- for(i=0;i<skb->len && i <99;i++) {
- sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]);
- }
+ for (i = 0; i < skb->len && i < 99; i++) {
+ sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
+ }
#elif DUMP_PACKETS >= 1
- for(i=0;i<skb->len && i < 30;i++) {
- sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]);
- }
+ for (i = 0; i < skb->len && i < 30; i++) {
+ sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
+ }
#endif /* DUMP_PACKETS >= 1 */
- if (i==skb->len)
- printk("%s\n",buf);
- else
- printk("%s...\n",buf);
+ if (i == skb->len)
+ printk("%s\n", buf);
+ else
+ printk("%s...\n", buf);
#endif /* DUMP_PACKETS > 0 */
- if (memcmp(skb->data, lec_ctrl_magic, 4) ==0) { /* Control frame, to daemon*/
+ if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */
struct sock *sk = sk_atm(vcc);
- DPRINTK("%s: To daemon\n",dev->name);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
- } else { /* Data frame, queue to protocol handlers */
+ DPRINTK("%s: To daemon\n", dev->name);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
+ } else { /* Data frame, queue to protocol handlers */
struct lec_arp_table *entry;
- unsigned char *src, *dst;
-
- atm_return(vcc,skb->truesize);
- if (*(uint16_t *)skb->data == htons(priv->lecid) ||
- !priv->lecd ||
- !(dev->flags & IFF_UP)) {
- /* Probably looping back, or if lecd is missing,
- lecd has gone down */
- DPRINTK("Ignoring frame...\n");
- dev_kfree_skb(skb);
- return;
- }
+ unsigned char *src, *dst;
+
+ atm_return(vcc, skb->truesize);
+ if (*(uint16_t *) skb->data == htons(priv->lecid) ||
+ !priv->lecd || !(dev->flags & IFF_UP)) {
+ /*
+ * Probably looping back, or if lecd is missing,
+ * lecd has gone down
+ */
+ DPRINTK("Ignoring frame...\n");
+ dev_kfree_skb(skb);
+ return;
+ }
#ifdef CONFIG_TR
- if (priv->is_trdev)
- dst = ((struct lecdatahdr_8025 *) skb->data)->h_dest;
- else
+ if (priv->is_trdev)
+ dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest;
+ else
#endif
- dst = ((struct lecdatahdr_8023 *) skb->data)->h_dest;
+ dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest;
- /* If this is a Data Direct VCC, and the VCC does not match
+ /*
+ * If this is a Data Direct VCC, and the VCC does not match
* the LE_ARP cache entry, delete the LE_ARP cache entry.
*/
spin_lock_irqsave(&priv->lec_arp_lock, flags);
if (lec_is_data_direct(vcc)) {
#ifdef CONFIG_TR
if (priv->is_trdev)
- src = ((struct lecdatahdr_8025 *) skb->data)->h_source;
+ src =
+ ((struct lecdatahdr_8025 *)skb->data)->
+ h_source;
else
#endif
- src = ((struct lecdatahdr_8023 *) skb->data)->h_source;
+ src =
+ ((struct lecdatahdr_8023 *)skb->data)->
+ h_source;
entry = lec_arp_find(priv, src);
if (entry && entry->vcc != vcc) {
lec_arp_remove(priv, entry);
- kfree(entry);
+ lec_arp_put(entry);
}
}
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */
- !priv->is_proxy && /* Proxy wants all the packets */
+ if (!(dst[0] & 0x01) && /* Never filter Multi/Broadcast */
+ !priv->is_proxy && /* Proxy wants all the packets */
memcmp(dst, dev->dev_addr, dev->addr_len)) {
- dev_kfree_skb(skb);
- return;
- }
- if (priv->lec_arp_empty_ones) {
- lec_arp_check_empties(priv, vcc, skb);
- }
- skb->dev = dev;
- skb_pull(skb, 2); /* skip lec_id */
+ dev_kfree_skb(skb);
+ return;
+ }
+ if (!hlist_empty(&priv->lec_arp_empty_ones)) {
+ lec_arp_check_empties(priv, vcc, skb);
+ }
+ skb->dev = dev;
+ skb_pull(skb, 2); /* skip lec_id */
#ifdef CONFIG_TR
- if (priv->is_trdev) skb->protocol = tr_type_trans(skb, dev);
- else
+ if (priv->is_trdev)
+ skb->protocol = tr_type_trans(skb, dev);
+ else
#endif
- skb->protocol = eth_type_trans(skb, dev);
- priv->stats.rx_packets++;
- priv->stats.rx_bytes += skb->len;
- memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
- netif_rx(skb);
- }
+ skb->protocol = eth_type_trans(skb, dev);
+ priv->stats.rx_packets++;
+ priv->stats.rx_bytes += skb->len;
+ memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+ netif_rx(skb);
+ }
}
-static void
-lec_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+static void lec_pop(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
struct net_device *dev = skb->dev;
@@ -821,123 +859,121 @@ lec_pop(struct atm_vcc *vcc, struct sk_buff *skb)
}
}
-static int
-lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
+static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
{
struct lec_vcc_priv *vpriv;
- int bytes_left;
- struct atmlec_ioc ioc_data;
-
- /* Lecd must be up in this case */
- bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
- if (bytes_left != 0) {
- printk("lec: lec_vcc_attach, copy from user failed for %d bytes\n",
- bytes_left);
- }
- if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
- !dev_lec[ioc_data.dev_num])
- return -EINVAL;
+ int bytes_left;
+ struct atmlec_ioc ioc_data;
+
+ /* Lecd must be up in this case */
+ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
+ if (bytes_left != 0) {
+ printk
+ ("lec: lec_vcc_attach, copy from user failed for %d bytes\n",
+ bytes_left);
+ }
+ if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
+ !dev_lec[ioc_data.dev_num])
+ return -EINVAL;
if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
return -ENOMEM;
vpriv->xoff = 0;
vpriv->old_pop = vcc->pop;
vcc->user_back = vpriv;
vcc->pop = lec_pop;
- lec_vcc_added(dev_lec[ioc_data.dev_num]->priv,
- &ioc_data, vcc, vcc->push);
- vcc->proto_data = dev_lec[ioc_data.dev_num];
- vcc->push = lec_push;
- return 0;
+ lec_vcc_added(dev_lec[ioc_data.dev_num]->priv,
+ &ioc_data, vcc, vcc->push);
+ vcc->proto_data = dev_lec[ioc_data.dev_num];
+ vcc->push = lec_push;
+ return 0;
}
-static int
-lec_mcast_attach(struct atm_vcc *vcc, int arg)
+static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
- if (arg <0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
- return -EINVAL;
- vcc->proto_data = dev_lec[arg];
- return (lec_mcast_make((struct lec_priv*)dev_lec[arg]->priv, vcc));
+ if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+ return -EINVAL;
+ vcc->proto_data = dev_lec[arg];
+ return (lec_mcast_make((struct lec_priv *)dev_lec[arg]->priv, vcc));
}
/* Initialize device. */
-static int
-lecd_attach(struct atm_vcc *vcc, int arg)
-{
- int i;
- struct lec_priv *priv;
-
- if (arg<0)
- i = 0;
- else
- i = arg;
+static int lecd_attach(struct atm_vcc *vcc, int arg)
+{
+ int i;
+ struct lec_priv *priv;
+
+ if (arg < 0)
+ i = 0;
+ else
+ i = arg;
#ifdef CONFIG_TR
- if (arg >= MAX_LEC_ITF)
- return -EINVAL;
-#else /* Reserve the top NUM_TR_DEVS for TR */
- if (arg >= (MAX_LEC_ITF-NUM_TR_DEVS))
- return -EINVAL;
+ if (arg >= MAX_LEC_ITF)
+ return -EINVAL;
+#else /* Reserve the top NUM_TR_DEVS for TR */
+ if (arg >= (MAX_LEC_ITF - NUM_TR_DEVS))
+ return -EINVAL;
#endif
- if (!dev_lec[i]) {
- int is_trdev, size;
+ if (!dev_lec[i]) {
+ int is_trdev, size;
- is_trdev = 0;
- if (i >= (MAX_LEC_ITF - NUM_TR_DEVS))
- is_trdev = 1;
+ is_trdev = 0;
+ if (i >= (MAX_LEC_ITF - NUM_TR_DEVS))
+ is_trdev = 1;
- size = sizeof(struct lec_priv);
+ size = sizeof(struct lec_priv);
#ifdef CONFIG_TR
- if (is_trdev)
- dev_lec[i] = alloc_trdev(size);
- else
+ if (is_trdev)
+ dev_lec[i] = alloc_trdev(size);
+ else
#endif
- dev_lec[i] = alloc_etherdev(size);
- if (!dev_lec[i])
- return -ENOMEM;
- snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
- if (register_netdev(dev_lec[i])) {
- free_netdev(dev_lec[i]);
- return -EINVAL;
- }
-
- priv = dev_lec[i]->priv;
- priv->is_trdev = is_trdev;
- lec_init(dev_lec[i]);
- } else {
- priv = dev_lec[i]->priv;
- if (priv->lecd)
- return -EADDRINUSE;
- }
- lec_arp_init(priv);
- priv->itfnum = i; /* LANE2 addition */
- priv->lecd = vcc;
- vcc->dev = &lecatm_dev;
- vcc_insert_socket(sk_atm(vcc));
-
- vcc->proto_data = dev_lec[i];
- set_bit(ATM_VF_META,&vcc->flags);
- set_bit(ATM_VF_READY,&vcc->flags);
-
- /* Set default values to these variables */
- priv->maximum_unknown_frame_count = 1;
- priv->max_unknown_frame_time = (1*HZ);
- priv->vcc_timeout_period = (1200*HZ);
- priv->max_retry_count = 1;
- priv->aging_time = (300*HZ);
- priv->forward_delay_time = (15*HZ);
- priv->topology_change = 0;
- priv->arp_response_time = (1*HZ);
- priv->flush_timeout = (4*HZ);
- priv->path_switching_delay = (6*HZ);
-
- if (dev_lec[i]->flags & IFF_UP) {
- netif_start_queue(dev_lec[i]);
- }
- __module_get(THIS_MODULE);
- return i;
+ dev_lec[i] = alloc_etherdev(size);
+ if (!dev_lec[i])
+ return -ENOMEM;
+ snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
+ if (register_netdev(dev_lec[i])) {
+ free_netdev(dev_lec[i]);
+ return -EINVAL;
+ }
+
+ priv = dev_lec[i]->priv;
+ priv->is_trdev = is_trdev;
+ lec_init(dev_lec[i]);
+ } else {
+ priv = dev_lec[i]->priv;
+ if (priv->lecd)
+ return -EADDRINUSE;
+ }
+ lec_arp_init(priv);
+ priv->itfnum = i; /* LANE2 addition */
+ priv->lecd = vcc;
+ vcc->dev = &lecatm_dev;
+ vcc_insert_socket(sk_atm(vcc));
+
+ vcc->proto_data = dev_lec[i];
+ set_bit(ATM_VF_META, &vcc->flags);
+ set_bit(ATM_VF_READY, &vcc->flags);
+
+ /* Set default values to these variables */
+ priv->maximum_unknown_frame_count = 1;
+ priv->max_unknown_frame_time = (1 * HZ);
+ priv->vcc_timeout_period = (1200 * HZ);
+ priv->max_retry_count = 1;
+ priv->aging_time = (300 * HZ);
+ priv->forward_delay_time = (15 * HZ);
+ priv->topology_change = 0;
+ priv->arp_response_time = (1 * HZ);
+ priv->flush_timeout = (4 * HZ);
+ priv->path_switching_delay = (6 * HZ);
+
+ if (dev_lec[i]->flags & IFF_UP) {
+ netif_start_queue(dev_lec[i]);
+ }
+ __module_get(THIS_MODULE);
+ return i;
}
#ifdef CONFIG_PROC_FS
-static char* lec_arp_get_status_string(unsigned char status)
+static char *lec_arp_get_status_string(unsigned char status)
{
static char *lec_arp_status_string[] = {
"ESI_UNKNOWN ",
@@ -967,52 +1003,54 @@ static void lec_info(struct seq_file *seq, struct lec_arp_table *entry)
if (entry->vcc)
seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci);
else
- seq_printf(seq, " ");
+ seq_printf(seq, " ");
if (entry->recv_vcc) {
seq_printf(seq, " %3d %3d", entry->recv_vcc->vpi,
entry->recv_vcc->vci);
- }
- seq_putc(seq, '\n');
+ }
+ seq_putc(seq, '\n');
}
-
struct lec_state {
unsigned long flags;
struct lec_priv *locked;
- struct lec_arp_table *entry;
+ struct hlist_node *node;
struct net_device *dev;
int itf;
int arp_table;
int misc_table;
};
-static void *lec_tbl_walk(struct lec_state *state, struct lec_arp_table *tbl,
+static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
loff_t *l)
{
- struct lec_arp_table *e = state->entry;
+ struct hlist_node *e = state->node;
+ struct lec_arp_table *tmp;
if (!e)
- e = tbl;
+ e = tbl->first;
if (e == (void *)1) {
- e = tbl;
+ e = tbl->first;
--*l;
}
- for (; e; e = e->next) {
+
+ hlist_for_each_entry_from(tmp, e, next) {
if (--*l < 0)
break;
}
- state->entry = e;
+ state->node = e;
+
return (*l < 0) ? state : NULL;
}
static void *lec_arp_walk(struct lec_state *state, loff_t *l,
- struct lec_priv *priv)
+ struct lec_priv *priv)
{
void *v = NULL;
int p;
for (p = state->arp_table; p < LEC_ARP_TABLE_SIZE; p++) {
- v = lec_tbl_walk(state, priv->lec_arp_tables[p], l);
+ v = lec_tbl_walk(state, &priv->lec_arp_tables[p], l);
if (v)
break;
}
@@ -1023,10 +1061,10 @@ static void *lec_arp_walk(struct lec_state *state, loff_t *l,
static void *lec_misc_walk(struct lec_state *state, loff_t *l,
struct lec_priv *priv)
{
- struct lec_arp_table *lec_misc_tables[] = {
- priv->lec_arp_empty_ones,
- priv->lec_no_forward,
- priv->mcast_fwds
+ struct hlist_head *lec_misc_tables[] = {
+ &priv->lec_arp_empty_ones,
+ &priv->lec_no_forward,
+ &priv->mcast_fwds
};
void *v = NULL;
int q;
@@ -1047,8 +1085,7 @@ static void *lec_priv_walk(struct lec_state *state, loff_t *l,
state->locked = priv;
spin_lock_irqsave(&priv->lec_arp_lock, state->flags);
}
- if (!lec_arp_walk(state, l, priv) &&
- !lec_misc_walk(state, l, priv)) {
+ if (!lec_arp_walk(state, l, priv) && !lec_misc_walk(state, l, priv)) {
spin_unlock_irqrestore(&priv->lec_arp_lock, state->flags);
state->locked = NULL;
/* Partial state reset for the next time we get called */
@@ -1082,7 +1119,7 @@ static void *lec_get_idx(struct lec_state *state, loff_t l)
if (v)
break;
}
- return v;
+ return v;
}
static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1094,9 +1131,9 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
state->locked = NULL;
state->arp_table = 0;
state->misc_table = 0;
- state->entry = (void *)1;
+ state->node = (void *)1;
- return *pos ? lec_get_idx(state, *pos) : (void*)1;
+ return *pos ? lec_get_idx(state, *pos) : (void *)1;
}
static void lec_seq_stop(struct seq_file *seq, void *v)
@@ -1121,27 +1158,28 @@ static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int lec_seq_show(struct seq_file *seq, void *v)
{
- static char lec_banner[] = "Itf MAC ATM destination"
- " Status Flags "
- "VPI/VCI Recv VPI/VCI\n";
+ static char lec_banner[] = "Itf MAC ATM destination"
+ " Status Flags "
+ "VPI/VCI Recv VPI/VCI\n";
if (v == (void *)1)
seq_puts(seq, lec_banner);
else {
struct lec_state *state = seq->private;
- struct net_device *dev = state->dev;
+ struct net_device *dev = state->dev;
+ struct lec_arp_table *entry = hlist_entry(state->node, struct lec_arp_table, next);
seq_printf(seq, "%s ", dev->name);
- lec_info(seq, state->entry);
+ lec_info(seq, entry);
}
return 0;
}
static struct seq_operations lec_seq_ops = {
- .start = lec_seq_start,
- .next = lec_seq_next,
- .stop = lec_seq_stop,
- .show = lec_seq_show,
+ .start = lec_seq_start,
+ .next = lec_seq_next,
+ .stop = lec_seq_stop,
+ .show = lec_seq_show,
};
static int lec_seq_open(struct inode *inode, struct file *file)
@@ -1175,11 +1213,11 @@ static int lec_seq_release(struct inode *inode, struct file *file)
}
static struct file_operations lec_seq_fops = {
- .owner = THIS_MODULE,
- .open = lec_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = lec_seq_release,
+ .owner = THIS_MODULE,
+ .open = lec_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = lec_seq_release,
};
#endif
@@ -1187,38 +1225,38 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct atm_vcc *vcc = ATM_SD(sock);
int err = 0;
-
+
switch (cmd) {
- case ATMLEC_CTRL:
- case ATMLEC_MCAST:
- case ATMLEC_DATA:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- break;
- default:
- return -ENOIOCTLCMD;
+ case ATMLEC_CTRL:
+ case ATMLEC_MCAST:
+ case ATMLEC_DATA:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ break;
+ default:
+ return -ENOIOCTLCMD;
}
switch (cmd) {
- case ATMLEC_CTRL:
- err = lecd_attach(vcc, (int) arg);
- if (err >= 0)
- sock->state = SS_CONNECTED;
- break;
- case ATMLEC_MCAST:
- err = lec_mcast_attach(vcc, (int) arg);
- break;
- case ATMLEC_DATA:
- err = lec_vcc_attach(vcc, (void __user *) arg);
- break;
+ case ATMLEC_CTRL:
+ err = lecd_attach(vcc, (int)arg);
+ if (err >= 0)
+ sock->state = SS_CONNECTED;
+ break;
+ case ATMLEC_MCAST:
+ err = lec_mcast_attach(vcc, (int)arg);
+ break;
+ case ATMLEC_DATA:
+ err = lec_vcc_attach(vcc, (void __user *)arg);
+ break;
}
return err;
}
static struct atm_ioctl lane_ioctl_ops = {
- .owner = THIS_MODULE,
- .ioctl = lane_ioctl,
+ .owner = THIS_MODULE,
+ .ioctl = lane_ioctl,
};
static int __init lane_module_init(void)
@@ -1232,29 +1270,29 @@ static int __init lane_module_init(void)
#endif
register_atm_ioctl(&lane_ioctl_ops);
- printk("lec.c: " __DATE__ " " __TIME__ " initialized\n");
- return 0;
+ printk("lec.c: " __DATE__ " " __TIME__ " initialized\n");
+ return 0;
}
static void __exit lane_module_cleanup(void)
{
- int i;
- struct lec_priv *priv;
+ int i;
+ struct lec_priv *priv;
remove_proc_entry("lec", atm_proc_root);
deregister_atm_ioctl(&lane_ioctl_ops);
- for (i = 0; i < MAX_LEC_ITF; i++) {
- if (dev_lec[i] != NULL) {
- priv = (struct lec_priv *)dev_lec[i]->priv;
+ for (i = 0; i < MAX_LEC_ITF; i++) {
+ if (dev_lec[i] != NULL) {
+ priv = (struct lec_priv *)dev_lec[i]->priv;
unregister_netdev(dev_lec[i]);
- free_netdev(dev_lec[i]);
- dev_lec[i] = NULL;
- }
- }
+ free_netdev(dev_lec[i]);
+ dev_lec[i] = NULL;
+ }
+ }
- return;
+ return;
}
module_init(lane_module_init);
@@ -1268,34 +1306,34 @@ module_exit(lane_module_cleanup);
* If dst_mac == NULL, targetless LE_ARP will be sent
*/
static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
- u8 **tlvs, u32 *sizeoftlvs)
+ u8 **tlvs, u32 *sizeoftlvs)
{
unsigned long flags;
- struct lec_priv *priv = (struct lec_priv *)dev->priv;
- struct lec_arp_table *table;
- struct sk_buff *skb;
- int retval;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
+ struct lec_arp_table *table;
+ struct sk_buff *skb;
+ int retval;
- if (force == 0) {
+ if (force == 0) {
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- table = lec_arp_find(priv, dst_mac);
+ table = lec_arp_find(priv, dst_mac);
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- if(table == NULL)
- return -1;
-
- *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC);
- if (*tlvs == NULL)
- return -1;
-
- memcpy(*tlvs, table->tlvs, table->sizeoftlvs);
- *sizeoftlvs = table->sizeoftlvs;
-
- return 0;
- }
+ if (table == NULL)
+ return -1;
+
+ *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC);
+ if (*tlvs == NULL)
+ return -1;
+
+ memcpy(*tlvs, table->tlvs, table->sizeoftlvs);
+ *sizeoftlvs = table->sizeoftlvs;
+
+ return 0;
+ }
if (sizeoftlvs == NULL)
retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, NULL);
-
+
else {
skb = alloc_skb(*sizeoftlvs, GFP_ATOMIC);
if (skb == NULL)
@@ -1304,9 +1342,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
memcpy(skb->data, *tlvs, *sizeoftlvs);
retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
}
- return retval;
-}
-
+ return retval;
+}
/*
* LANE2: 3.1.4, LE_ASSOCIATE.request
@@ -1315,80 +1352,85 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
* Returns 1 for success, 0 for failure (out of memory)
*
*/
-static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs)
+static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
+ u8 *tlvs, u32 sizeoftlvs)
{
- int retval;
- struct sk_buff *skb;
- struct lec_priv *priv = (struct lec_priv*)dev->priv;
-
- if (compare_ether_addr(lan_dst, dev->dev_addr))
- return (0); /* not our mac address */
-
- kfree(priv->tlvs); /* NULL if there was no previous association */
-
- priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
- if (priv->tlvs == NULL)
- return (0);
- priv->sizeoftlvs = sizeoftlvs;
- memcpy(priv->tlvs, tlvs, sizeoftlvs);
-
- skb = alloc_skb(sizeoftlvs, GFP_ATOMIC);
- if (skb == NULL)
- return 0;
- skb->len = sizeoftlvs;
- memcpy(skb->data, tlvs, sizeoftlvs);
- retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
- if (retval != 0)
- printk("lec.c: lane2_associate_req() failed\n");
- /* If the previous association has changed we must
- * somehow notify other LANE entities about the change
- */
- return (1);
+ int retval;
+ struct sk_buff *skb;
+ struct lec_priv *priv = (struct lec_priv *)dev->priv;
+
+ if (compare_ether_addr(lan_dst, dev->dev_addr))
+ return (0); /* not our mac address */
+
+ kfree(priv->tlvs); /* NULL if there was no previous association */
+
+ priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+ if (priv->tlvs == NULL)
+ return (0);
+ priv->sizeoftlvs = sizeoftlvs;
+ memcpy(priv->tlvs, tlvs, sizeoftlvs);
+
+ skb = alloc_skb(sizeoftlvs, GFP_ATOMIC);
+ if (skb == NULL)
+ return 0;
+ skb->len = sizeoftlvs;
+ memcpy(skb->data, tlvs, sizeoftlvs);
+ retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
+ if (retval != 0)
+ printk("lec.c: lane2_associate_req() failed\n");
+ /*
+ * If the previous association has changed we must
+ * somehow notify other LANE entities about the change
+ */
+ return (1);
}
/*
* LANE2: 3.1.5, LE_ASSOCIATE.indication
*
*/
-static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr,
- u8 *tlvs, u32 sizeoftlvs)
+static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
+ u8 *tlvs, u32 sizeoftlvs)
{
#if 0
- int i = 0;
+ int i = 0;
#endif
struct lec_priv *priv = (struct lec_priv *)dev->priv;
-#if 0 /* Why have the TLVs in LE_ARP entries since we do not use them? When you
- uncomment this code, make sure the TLVs get freed when entry is killed */
- struct lec_arp_table *entry = lec_arp_find(priv, mac_addr);
+#if 0 /*
+ * Why have the TLVs in LE_ARP entries
+ * since we do not use them? When you
+ * uncomment this code, make sure the
+ * TLVs get freed when entry is killed
+ */
+ struct lec_arp_table *entry = lec_arp_find(priv, mac_addr);
- if (entry == NULL)
- return; /* should not happen */
+ if (entry == NULL)
+ return; /* should not happen */
- kfree(entry->tlvs);
+ kfree(entry->tlvs);
- entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
- if (entry->tlvs == NULL)
- return;
+ entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+ if (entry->tlvs == NULL)
+ return;
- entry->sizeoftlvs = sizeoftlvs;
- memcpy(entry->tlvs, tlvs, sizeoftlvs);
+ entry->sizeoftlvs = sizeoftlvs;
+ memcpy(entry->tlvs, tlvs, sizeoftlvs);
#endif
#if 0
- printk("lec.c: lane2_associate_ind()\n");
- printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs);
- while (i < sizeoftlvs)
- printk("%02x ", tlvs[i++]);
-
- printk("\n");
+ printk("lec.c: lane2_associate_ind()\n");
+ printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs);
+ while (i < sizeoftlvs)
+ printk("%02x ", tlvs[i++]);
+
+ printk("\n");
#endif
- /* tell MPOA about the TLVs we saw */
- if (priv->lane2_ops && priv->lane2_ops->associate_indicator) {
- priv->lane2_ops->associate_indicator(dev, mac_addr,
- tlvs, sizeoftlvs);
- }
- return;
+ /* tell MPOA about the TLVs we saw */
+ if (priv->lane2_ops && priv->lane2_ops->associate_indicator) {
+ priv->lane2_ops->associate_indicator(dev, mac_addr,
+ tlvs, sizeoftlvs);
+ }
+ return;
}
/*
@@ -1396,7 +1438,6 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr,
*
* lec_arpc.c was added here when making
* lane client modular. October 1997
- *
*/
#include <linux/types.h>
@@ -1407,7 +1448,6 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr,
#include <linux/inetdevice.h>
#include <net/route.h>
-
#if 0
#define DPRINTK(format,args...)
/*
@@ -1418,7 +1458,7 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr,
#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
-static void lec_arp_check_expire(unsigned long data);
+static void lec_arp_check_expire(void *data);
static void lec_arp_expire_arp(unsigned long data);
/*
@@ -1430,475 +1470,397 @@ static void lec_arp_expire_arp(unsigned long data);
/*
* Initialization of arp-cache
*/
-static void
-lec_arp_init(struct lec_priv *priv)
+static void lec_arp_init(struct lec_priv *priv)
{
- unsigned short i;
+ unsigned short i;
- for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- priv->lec_arp_tables[i] = NULL;
- }
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
+ }
+ INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
+ INIT_HLIST_HEAD(&priv->lec_no_forward);
+ INIT_HLIST_HEAD(&priv->mcast_fwds);
spin_lock_init(&priv->lec_arp_lock);
- init_timer(&priv->lec_arp_timer);
- priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL;
- priv->lec_arp_timer.data = (unsigned long)priv;
- priv->lec_arp_timer.function = lec_arp_check_expire;
- add_timer(&priv->lec_arp_timer);
+ INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv);
+ schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL);
}
-static void
-lec_arp_clear_vccs(struct lec_arp_table *entry)
+static void lec_arp_clear_vccs(struct lec_arp_table *entry)
{
- if (entry->vcc) {
+ if (entry->vcc) {
struct atm_vcc *vcc = entry->vcc;
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
- struct net_device *dev = (struct net_device*) vcc->proto_data;
+ struct net_device *dev = (struct net_device *)vcc->proto_data;
- vcc->pop = vpriv->old_pop;
+ vcc->pop = vpriv->old_pop;
if (vpriv->xoff)
netif_wake_queue(dev);
kfree(vpriv);
vcc->user_back = NULL;
- vcc->push = entry->old_push;
+ vcc->push = entry->old_push;
vcc_release_async(vcc, -EPIPE);
- vcc = NULL;
- }
- if (entry->recv_vcc) {
- entry->recv_vcc->push = entry->old_recv_push;
+ entry->vcc = NULL;
+ }
+ if (entry->recv_vcc) {
+ entry->recv_vcc->push = entry->old_recv_push;
vcc_release_async(entry->recv_vcc, -EPIPE);
- entry->recv_vcc = NULL;
- }
+ entry->recv_vcc = NULL;
+ }
}
/*
* Insert entry to lec_arp_table
* LANE2: Add to the end of the list to satisfy 8.1.13
*/
-static inline void
-lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add)
+static inline void
+lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry)
{
- unsigned short place;
- struct lec_arp_table *tmp;
-
- place = HASH(to_add->mac_addr[ETH_ALEN-1]);
- tmp = priv->lec_arp_tables[place];
- to_add->next = NULL;
- if (tmp == NULL)
- priv->lec_arp_tables[place] = to_add;
-
- else { /* add to the end */
- while (tmp->next)
- tmp = tmp->next;
- tmp->next = to_add;
- }
-
- DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
- 0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1],
- 0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3],
- 0xff&to_add->mac_addr[4], 0xff&to_add->mac_addr[5]);
+ struct hlist_head *tmp;
+
+ tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])];
+ hlist_add_head(&entry->next, tmp);
+
+ DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+ 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1],
+ 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3],
+ 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]);
}
/*
* Remove entry from lec_arp_table
*/
-static int
-lec_arp_remove(struct lec_priv *priv,
- struct lec_arp_table *to_remove)
+static int
+lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
{
- unsigned short place;
- struct lec_arp_table *tmp;
- int remove_vcc=1;
-
- if (!to_remove) {
- return -1;
- }
- place = HASH(to_remove->mac_addr[ETH_ALEN-1]);
- tmp = priv->lec_arp_tables[place];
- if (tmp == to_remove) {
- priv->lec_arp_tables[place] = tmp->next;
- } else {
- while(tmp && tmp->next != to_remove) {
- tmp = tmp->next;
- }
- if (!tmp) {/* Entry was not found */
- return -1;
- }
- }
- tmp->next = to_remove->next;
- del_timer(&to_remove->timer);
-
- /* If this is the only MAC connected to this VCC, also tear down
- the VCC */
- if (to_remove->status >= ESI_FLUSH_PENDING) {
- /*
- * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
- */
- for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) {
- for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) {
- if (memcmp(tmp->atm_addr, to_remove->atm_addr,
- ATM_ESA_LEN)==0) {
- remove_vcc=0;
- break;
- }
- }
- }
- if (remove_vcc)
- lec_arp_clear_vccs(to_remove);
- }
- skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */
-
- DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
- 0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1],
- 0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3],
- 0xff&to_remove->mac_addr[4], 0xff&to_remove->mac_addr[5]);
- return 0;
+ struct hlist_node *node;
+ struct lec_arp_table *entry;
+ int i, remove_vcc = 1;
+
+ if (!to_remove) {
+ return -1;
+ }
+
+ hlist_del(&to_remove->next);
+ del_timer(&to_remove->timer);
+
+ /* If this is the only MAC connected to this VCC, also tear down the VCC */
+ if (to_remove->status >= ESI_FLUSH_PENDING) {
+ /*
+ * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
+ */
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+ if (memcmp(to_remove->atm_addr,
+ entry->atm_addr, ATM_ESA_LEN) == 0) {
+ remove_vcc = 0;
+ break;
+ }
+ }
+ }
+ if (remove_vcc)
+ lec_arp_clear_vccs(to_remove);
+ }
+ skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */
+
+ DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+ 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1],
+ 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3],
+ 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]);
+ return 0;
}
#if DEBUG_ARP_TABLE
-static char*
-get_status_string(unsigned char st)
+static char *get_status_string(unsigned char st)
{
- switch(st) {
- case ESI_UNKNOWN:
- return "ESI_UNKNOWN";
- case ESI_ARP_PENDING:
- return "ESI_ARP_PENDING";
- case ESI_VC_PENDING:
- return "ESI_VC_PENDING";
- case ESI_FLUSH_PENDING:
- return "ESI_FLUSH_PENDING";
- case ESI_FORWARD_DIRECT:
- return "ESI_FORWARD_DIRECT";
- default:
- return "<UNKNOWN>";
- }
+ switch (st) {
+ case ESI_UNKNOWN:
+ return "ESI_UNKNOWN";
+ case ESI_ARP_PENDING:
+ return "ESI_ARP_PENDING";
+ case ESI_VC_PENDING:
+ return "ESI_VC_PENDING";
+ case ESI_FLUSH_PENDING:
+ return "ESI_FLUSH_PENDING";
+ case ESI_FORWARD_DIRECT:
+ return "ESI_FORWARD_DIRECT";
+ default:
+ return "<UNKNOWN>";
+ }
}
-#endif
-static void
-dump_arp_table(struct lec_priv *priv)
+static void dump_arp_table(struct lec_priv *priv)
{
-#if DEBUG_ARP_TABLE
- int i,j, offset;
- struct lec_arp_table *rulla;
- char buf[1024];
- struct lec_arp_table **lec_arp_tables =
- (struct lec_arp_table **)priv->lec_arp_tables;
- struct lec_arp_table *lec_arp_empty_ones =
- (struct lec_arp_table *)priv->lec_arp_empty_ones;
- struct lec_arp_table *lec_no_forward =
- (struct lec_arp_table *)priv->lec_no_forward;
- struct lec_arp_table *mcast_fwds = priv->mcast_fwds;
-
-
- printk("Dump %p:\n",priv);
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- rulla = lec_arp_tables[i];
- offset = 0;
- offset += sprintf(buf,"%d: %p\n",i, rulla);
- while (rulla) {
- offset += sprintf(buf+offset,"Mac:");
- for(j=0;j<ETH_ALEN;j++) {
- offset+=sprintf(buf+offset,
- "%2.2x ",
- rulla->mac_addr[j]&0xff);
- }
- offset +=sprintf(buf+offset,"Atm:");
- for(j=0;j<ATM_ESA_LEN;j++) {
- offset+=sprintf(buf+offset,
- "%2.2x ",
- rulla->atm_addr[j]&0xff);
- }
- offset+=sprintf(buf+offset,
- "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
- rulla->vcc?rulla->vcc->vpi:0,
- rulla->vcc?rulla->vcc->vci:0,
- rulla->recv_vcc?rulla->recv_vcc->vpi:0,
- rulla->recv_vcc?rulla->recv_vcc->vci:0,
- rulla->last_used,
- rulla->timestamp, rulla->no_tries);
- offset+=sprintf(buf+offset,
- "Flags:%x, Packets_flooded:%x, Status: %s ",
- rulla->flags, rulla->packets_flooded,
- get_status_string(rulla->status));
- offset+=sprintf(buf+offset,"->%p\n",rulla->next);
- rulla = rulla->next;
- }
- printk("%s",buf);
- }
- rulla = lec_no_forward;
- if (rulla)
- printk("No forward\n");
- while(rulla) {
- offset=0;
- offset += sprintf(buf+offset,"Mac:");
- for(j=0;j<ETH_ALEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->mac_addr[j]&0xff);
- }
- offset +=sprintf(buf+offset,"Atm:");
- for(j=0;j<ATM_ESA_LEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->atm_addr[j]&0xff);
- }
- offset+=sprintf(buf+offset,
- "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
- rulla->vcc?rulla->vcc->vpi:0,
- rulla->vcc?rulla->vcc->vci:0,
- rulla->recv_vcc?rulla->recv_vcc->vpi:0,
- rulla->recv_vcc?rulla->recv_vcc->vci:0,
- rulla->last_used,
- rulla->timestamp, rulla->no_tries);
- offset+=sprintf(buf+offset,
- "Flags:%x, Packets_flooded:%x, Status: %s ",
- rulla->flags, rulla->packets_flooded,
- get_status_string(rulla->status));
- offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
- rulla = rulla->next;
- printk("%s",buf);
- }
- rulla = lec_arp_empty_ones;
- if (rulla)
- printk("Empty ones\n");
- while(rulla) {
- offset=0;
- offset += sprintf(buf+offset,"Mac:");
- for(j=0;j<ETH_ALEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->mac_addr[j]&0xff);
- }
- offset +=sprintf(buf+offset,"Atm:");
- for(j=0;j<ATM_ESA_LEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->atm_addr[j]&0xff);
- }
- offset+=sprintf(buf+offset,
- "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
- rulla->vcc?rulla->vcc->vpi:0,
- rulla->vcc?rulla->vcc->vci:0,
- rulla->recv_vcc?rulla->recv_vcc->vpi:0,
- rulla->recv_vcc?rulla->recv_vcc->vci:0,
- rulla->last_used,
- rulla->timestamp, rulla->no_tries);
- offset+=sprintf(buf+offset,
- "Flags:%x, Packets_flooded:%x, Status: %s ",
- rulla->flags, rulla->packets_flooded,
- get_status_string(rulla->status));
- offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
- rulla = rulla->next;
- printk("%s",buf);
- }
-
- rulla = mcast_fwds;
- if (rulla)
- printk("Multicast Forward VCCs\n");
- while(rulla) {
- offset=0;
- offset += sprintf(buf+offset,"Mac:");
- for(j=0;j<ETH_ALEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->mac_addr[j]&0xff);
- }
- offset +=sprintf(buf+offset,"Atm:");
- for(j=0;j<ATM_ESA_LEN;j++) {
- offset+=sprintf(buf+offset,"%2.2x ",
- rulla->atm_addr[j]&0xff);
- }
- offset+=sprintf(buf+offset,
- "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
- rulla->vcc?rulla->vcc->vpi:0,
- rulla->vcc?rulla->vcc->vci:0,
- rulla->recv_vcc?rulla->recv_vcc->vpi:0,
- rulla->recv_vcc?rulla->recv_vcc->vci:0,
- rulla->last_used,
- rulla->timestamp, rulla->no_tries);
- offset+=sprintf(buf+offset,
- "Flags:%x, Packets_flooded:%x, Status: %s ",
- rulla->flags, rulla->packets_flooded,
- get_status_string(rulla->status));
- offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
- rulla = rulla->next;
- printk("%s",buf);
- }
+ struct hlist_node *node;
+ struct lec_arp_table *rulla;
+ char buf[256];
+ int i, j, offset;
+
+ printk("Dump %p:\n", priv);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry(rulla, node, &priv->lec_arp_tables[i], next) {
+ offset = 0;
+ offset += sprintf(buf, "%d: %p\n", i, rulla);
+ offset += sprintf(buf + offset, "Mac:");
+ for (j = 0; j < ETH_ALEN; j++) {
+ offset += sprintf(buf + offset,
+ "%2.2x ",
+ rulla->mac_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset, "Atm:");
+ for (j = 0; j < ATM_ESA_LEN; j++) {
+ offset += sprintf(buf + offset,
+ "%2.2x ",
+ rulla->atm_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset,
+ "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+ rulla->vcc ? rulla->vcc->vpi : 0,
+ rulla->vcc ? rulla->vcc->vci : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->
+ vpi : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->
+ vci : 0, rulla->last_used,
+ rulla->timestamp, rulla->no_tries);
+ offset +=
+ sprintf(buf + offset,
+ "Flags:%x, Packets_flooded:%x, Status: %s ",
+ rulla->flags, rulla->packets_flooded,
+ get_status_string(rulla->status));
+ printk("%s\n", buf);
+ }
+ }
+
+ if (!hlist_empty(&priv->lec_no_forward))
+ printk("No forward\n");
+ hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) {
+ offset = 0;
+ offset += sprintf(buf + offset, "Mac:");
+ for (j = 0; j < ETH_ALEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->mac_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset, "Atm:");
+ for (j = 0; j < ATM_ESA_LEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->atm_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset,
+ "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+ rulla->vcc ? rulla->vcc->vpi : 0,
+ rulla->vcc ? rulla->vcc->vci : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vpi : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vci : 0,
+ rulla->last_used,
+ rulla->timestamp, rulla->no_tries);
+ offset += sprintf(buf + offset,
+ "Flags:%x, Packets_flooded:%x, Status: %s ",
+ rulla->flags, rulla->packets_flooded,
+ get_status_string(rulla->status));
+ printk("%s\n", buf);
+ }
+
+ if (!hlist_empty(&priv->lec_arp_empty_ones))
+ printk("Empty ones\n");
+ hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) {
+ offset = 0;
+ offset += sprintf(buf + offset, "Mac:");
+ for (j = 0; j < ETH_ALEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->mac_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset, "Atm:");
+ for (j = 0; j < ATM_ESA_LEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->atm_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset,
+ "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+ rulla->vcc ? rulla->vcc->vpi : 0,
+ rulla->vcc ? rulla->vcc->vci : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vpi : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vci : 0,
+ rulla->last_used,
+ rulla->timestamp, rulla->no_tries);
+ offset += sprintf(buf + offset,
+ "Flags:%x, Packets_flooded:%x, Status: %s ",
+ rulla->flags, rulla->packets_flooded,
+ get_status_string(rulla->status));
+ printk("%s", buf);
+ }
+
+ if (!hlist_empty(&priv->mcast_fwds))
+ printk("Multicast Forward VCCs\n");
+ hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) {
+ offset = 0;
+ offset += sprintf(buf + offset, "Mac:");
+ for (j = 0; j < ETH_ALEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->mac_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset, "Atm:");
+ for (j = 0; j < ATM_ESA_LEN; j++) {
+ offset += sprintf(buf + offset, "%2.2x ",
+ rulla->atm_addr[j] & 0xff);
+ }
+ offset += sprintf(buf + offset,
+ "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+ rulla->vcc ? rulla->vcc->vpi : 0,
+ rulla->vcc ? rulla->vcc->vci : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vpi : 0,
+ rulla->recv_vcc ? rulla->recv_vcc->vci : 0,
+ rulla->last_used,
+ rulla->timestamp, rulla->no_tries);
+ offset += sprintf(buf + offset,
+ "Flags:%x, Packets_flooded:%x, Status: %s ",
+ rulla->flags, rulla->packets_flooded,
+ get_status_string(rulla->status));
+ printk("%s\n", buf);
+ }
-#endif
}
+#else
+#define dump_arp_table(priv) do { } while (0)
+#endif
/*
* Destruction of arp-cache
*/
-static void
-lec_arp_destroy(struct lec_priv *priv)
+static void lec_arp_destroy(struct lec_priv *priv)
{
unsigned long flags;
- struct lec_arp_table *entry, *next;
- int i;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry;
+ int i;
+
+ cancel_rearming_delayed_work(&priv->lec_arp_work);
- del_timer_sync(&priv->lec_arp_timer);
-
- /*
- * Remove all entries
- */
+ /*
+ * Remove all entries
+ */
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) {
- next = entry->next;
- lec_arp_remove(priv, entry);
- kfree(entry);
- }
- }
- entry = priv->lec_arp_empty_ones;
- while(entry) {
- next = entry->next;
- del_timer_sync(&entry->timer);
- lec_arp_clear_vccs(entry);
- kfree(entry);
- entry = next;
- }
- priv->lec_arp_empty_ones = NULL;
- entry = priv->lec_no_forward;
- while(entry) {
- next = entry->next;
- del_timer_sync(&entry->timer);
- lec_arp_clear_vccs(entry);
- kfree(entry);
- entry = next;
- }
- priv->lec_no_forward = NULL;
- entry = priv->mcast_fwds;
- while(entry) {
- next = entry->next;
- /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
- lec_arp_clear_vccs(entry);
- kfree(entry);
- entry = next;
- }
- priv->mcast_fwds = NULL;
- priv->mcast_vcc = NULL;
- memset(priv->lec_arp_tables, 0,
- sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+ lec_arp_remove(priv, entry);
+ lec_arp_put(entry);
+ }
+ INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
+ }
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+ del_timer_sync(&entry->timer);
+ lec_arp_clear_vccs(entry);
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) {
+ del_timer_sync(&entry->timer);
+ lec_arp_clear_vccs(entry);
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ INIT_HLIST_HEAD(&priv->lec_no_forward);
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+ /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
+ lec_arp_clear_vccs(entry);
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ INIT_HLIST_HEAD(&priv->mcast_fwds);
+ priv->mcast_vcc = NULL;
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
-
/*
* Find entry by mac_address
*/
-static struct lec_arp_table*
-lec_arp_find(struct lec_priv *priv,
- unsigned char *mac_addr)
+static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
+ unsigned char *mac_addr)
{
- unsigned short place;
- struct lec_arp_table *to_return;
-
- DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
- mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff,
- mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff);
- place = HASH(mac_addr[ETH_ALEN-1]);
-
- to_return = priv->lec_arp_tables[place];
- while(to_return) {
- if (!compare_ether_addr(mac_addr, to_return->mac_addr)) {
- return to_return;
- }
- to_return = to_return->next;
- }
- return NULL;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ struct lec_arp_table *entry;
+
+ DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+ mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff,
+ mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff);
+
+ head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
+ hlist_for_each_entry(entry, node, head, next) {
+ if (!compare_ether_addr(mac_addr, entry->mac_addr)) {
+ return entry;
+ }
+ }
+ return NULL;
}
-static struct lec_arp_table*
-make_entry(struct lec_priv *priv, unsigned char *mac_addr)
+static struct lec_arp_table *make_entry(struct lec_priv *priv,
+ unsigned char *mac_addr)
{
- struct lec_arp_table *to_return;
-
- to_return = kmalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
- if (!to_return) {
- printk("LEC: Arp entry kmalloc failed\n");
- return NULL;
- }
- memset(to_return, 0, sizeof(struct lec_arp_table));
- memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
- init_timer(&to_return->timer);
- to_return->timer.function = lec_arp_expire_arp;
- to_return->timer.data = (unsigned long) to_return;
- to_return->last_used = jiffies;
- to_return->priv = priv;
- skb_queue_head_init(&to_return->tx_wait);
- return to_return;
+ struct lec_arp_table *to_return;
+
+ to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
+ if (!to_return) {
+ printk("LEC: Arp entry kmalloc failed\n");
+ return NULL;
+ }
+ memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
+ INIT_HLIST_NODE(&to_return->next);
+ init_timer(&to_return->timer);
+ to_return->timer.function = lec_arp_expire_arp;
+ to_return->timer.data = (unsigned long)to_return;
+ to_return->last_used = jiffies;
+ to_return->priv = priv;
+ skb_queue_head_init(&to_return->tx_wait);
+ atomic_set(&to_return->usage, 1);
+ return to_return;
}
-/*
- *
- * Arp sent timer expired
- *
- */
-static void
-lec_arp_expire_arp(unsigned long data)
+/* Arp sent timer expired */
+static void lec_arp_expire_arp(unsigned long data)
{
- struct lec_arp_table *entry;
-
- entry = (struct lec_arp_table *)data;
-
- DPRINTK("lec_arp_expire_arp\n");
- if (entry->status == ESI_ARP_PENDING) {
- if (entry->no_tries <= entry->priv->max_retry_count) {
- if (entry->is_rdesc)
- send_to_lecd(entry->priv, l_rdesc_arp_xmt, entry->mac_addr, NULL, NULL);
- else
- send_to_lecd(entry->priv, l_arp_xmt, entry->mac_addr, NULL, NULL);
- entry->no_tries++;
- }
- mod_timer(&entry->timer, jiffies + (1*HZ));
- }
+ struct lec_arp_table *entry;
+
+ entry = (struct lec_arp_table *)data;
+
+ DPRINTK("lec_arp_expire_arp\n");
+ if (entry->status == ESI_ARP_PENDING) {
+ if (entry->no_tries <= entry->priv->max_retry_count) {
+ if (entry->is_rdesc)
+ send_to_lecd(entry->priv, l_rdesc_arp_xmt,
+ entry->mac_addr, NULL, NULL);
+ else
+ send_to_lecd(entry->priv, l_arp_xmt,
+ entry->mac_addr, NULL, NULL);
+ entry->no_tries++;
+ }
+ mod_timer(&entry->timer, jiffies + (1 * HZ));
+ }
}
-/*
- *
- * Unknown/unused vcc expire, remove associated entry
- *
- */
-static void
-lec_arp_expire_vcc(unsigned long data)
+/* Unknown/unused vcc expire, remove associated entry */
+static void lec_arp_expire_vcc(unsigned long data)
{
unsigned long flags;
- struct lec_arp_table *to_remove = (struct lec_arp_table*)data;
- struct lec_priv *priv = (struct lec_priv *)to_remove->priv;
- struct lec_arp_table *entry = NULL;
+ struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
+ struct lec_priv *priv = (struct lec_priv *)to_remove->priv;
- del_timer(&to_remove->timer);
+ del_timer(&to_remove->timer);
- DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n",
- to_remove, priv,
- to_remove->vcc?to_remove->recv_vcc->vpi:0,
- to_remove->vcc?to_remove->recv_vcc->vci:0);
- DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward);
+ DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n",
+ to_remove, priv,
+ to_remove->vcc ? to_remove->recv_vcc->vpi : 0,
+ to_remove->vcc ? to_remove->recv_vcc->vci : 0);
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- if (to_remove == priv->lec_arp_empty_ones)
- priv->lec_arp_empty_ones = to_remove->next;
- else {
- entry = priv->lec_arp_empty_ones;
- while (entry && entry->next != to_remove)
- entry = entry->next;
- if (entry)
- entry->next = to_remove->next;
- }
- if (!entry) {
- if (to_remove == priv->lec_no_forward) {
- priv->lec_no_forward = to_remove->next;
- } else {
- entry = priv->lec_no_forward;
- while (entry && entry->next != to_remove)
- entry = entry->next;
- if (entry)
- entry->next = to_remove->next;
- }
- }
+ hlist_del(&to_remove->next);
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- lec_arp_clear_vccs(to_remove);
- kfree(to_remove);
+ lec_arp_clear_vccs(to_remove);
+ lec_arp_put(to_remove);
}
/*
@@ -1917,158 +1879,171 @@ lec_arp_expire_vcc(unsigned long data)
* to ESI_FORWARD_DIRECT. This causes the flush period to end
* regardless of the progress of the flush protocol.
*/
-static void
-lec_arp_check_expire(unsigned long data)
+static void lec_arp_check_expire(void *data)
{
unsigned long flags;
- struct lec_priv *priv = (struct lec_priv *)data;
- struct lec_arp_table *entry, *next;
- unsigned long now;
- unsigned long time_to_check;
- int i;
-
- DPRINTK("lec_arp_check_expire %p\n",priv);
- DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones,
- priv->lec_no_forward);
+ struct lec_priv *priv = data;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry;
+ unsigned long now;
+ unsigned long time_to_check;
+ int i;
+
+ DPRINTK("lec_arp_check_expire %p\n", priv);
now = jiffies;
+restart:
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for(entry = priv->lec_arp_tables[i]; entry != NULL; ) {
- if ((entry->flags) & LEC_REMOTE_FLAG &&
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+ if ((entry->flags) & LEC_REMOTE_FLAG &&
priv->topology_change)
time_to_check = priv->forward_delay_time;
else
time_to_check = priv->aging_time;
DPRINTK("About to expire: %lx - %lx > %lx\n",
- now,entry->last_used, time_to_check);
- if( time_after(now, entry->last_used+
- time_to_check) &&
- !(entry->flags & LEC_PERMANENT_FLAG) &&
- !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */
+ now, entry->last_used, time_to_check);
+ if (time_after(now, entry->last_used + time_to_check)
+ && !(entry->flags & LEC_PERMANENT_FLAG)
+ && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */
/* Remove entry */
DPRINTK("LEC:Entry timed out\n");
- next = entry->next;
lec_arp_remove(priv, entry);
- kfree(entry);
- entry = next;
+ lec_arp_put(entry);
} else {
/* Something else */
if ((entry->status == ESI_VC_PENDING ||
- entry->status == ESI_ARP_PENDING)
+ entry->status == ESI_ARP_PENDING)
&& time_after_eq(now,
- entry->timestamp +
- priv->max_unknown_frame_time)) {
+ entry->timestamp +
+ priv->
+ max_unknown_frame_time)) {
entry->timestamp = jiffies;
entry->packets_flooded = 0;
if (entry->status == ESI_VC_PENDING)
- send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL);
+ send_to_lecd(priv, l_svc_setup,
+ entry->mac_addr,
+ entry->atm_addr,
+ NULL);
}
- if (entry->status == ESI_FLUSH_PENDING
- &&
- time_after_eq(now, entry->timestamp+
- priv->path_switching_delay)) {
+ if (entry->status == ESI_FLUSH_PENDING
+ &&
+ time_after_eq(now, entry->timestamp +
+ priv->path_switching_delay)) {
struct sk_buff *skb;
+ struct atm_vcc *vcc = entry->vcc;
+ lec_arp_hold(entry);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
- lec_send(entry->vcc, skb, entry->priv);
+ lec_send(vcc, skb, entry->priv);
entry->last_used = jiffies;
- entry->status =
- ESI_FORWARD_DIRECT;
+ entry->status = ESI_FORWARD_DIRECT;
+ lec_arp_put(entry);
+ goto restart;
}
- entry = entry->next;
}
}
}
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL);
+ schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL);
}
+
/*
* Try to find vcc where mac_address is attached.
*
*/
-static struct atm_vcc*
-lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find,
- int is_rdesc, struct lec_arp_table **ret_entry)
+static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
+ unsigned char *mac_to_find, int is_rdesc,
+ struct lec_arp_table **ret_entry)
{
unsigned long flags;
- struct lec_arp_table *entry;
+ struct lec_arp_table *entry;
struct atm_vcc *found;
- if (mac_to_find[0] & 0x01) {
- switch (priv->lane_version) {
- case 1:
- return priv->mcast_vcc;
- break;
- case 2: /* LANE2 wants arp for multicast addresses */
- if (!compare_ether_addr(mac_to_find, bus_mac))
- return priv->mcast_vcc;
- break;
- default:
- break;
- }
- }
+ if (mac_to_find[0] & 0x01) {
+ switch (priv->lane_version) {
+ case 1:
+ return priv->mcast_vcc;
+ break;
+ case 2: /* LANE2 wants arp for multicast addresses */
+ if (!compare_ether_addr(mac_to_find, bus_mac))
+ return priv->mcast_vcc;
+ break;
+ default:
+ break;
+ }
+ }
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- entry = lec_arp_find(priv, mac_to_find);
-
- if (entry) {
- if (entry->status == ESI_FORWARD_DIRECT) {
- /* Connection Ok */
- entry->last_used = jiffies;
- *ret_entry = entry;
- found = entry->vcc;
+ entry = lec_arp_find(priv, mac_to_find);
+
+ if (entry) {
+ if (entry->status == ESI_FORWARD_DIRECT) {
+ /* Connection Ok */
+ entry->last_used = jiffies;
+ lec_arp_hold(entry);
+ *ret_entry = entry;
+ found = entry->vcc;
goto out;
- }
- /* If the LE_ARP cache entry is still pending, reset count to 0
+ }
+ /*
+ * If the LE_ARP cache entry is still pending, reset count to 0
* so another LE_ARP request can be made for this frame.
*/
if (entry->status == ESI_ARP_PENDING) {
entry->no_tries = 0;
}
- /* Data direct VC not yet set up, check to see if the unknown
- frame count is greater than the limit. If the limit has
- not been reached, allow the caller to send packet to
- BUS. */
- if (entry->status != ESI_FLUSH_PENDING &&
- entry->packets_flooded<priv->maximum_unknown_frame_count) {
- entry->packets_flooded++;
- DPRINTK("LEC_ARP: Flooding..\n");
- found = priv->mcast_vcc;
+ /*
+ * Data direct VC not yet set up, check to see if the unknown
+ * frame count is greater than the limit. If the limit has
+ * not been reached, allow the caller to send packet to
+ * BUS.
+ */
+ if (entry->status != ESI_FLUSH_PENDING &&
+ entry->packets_flooded <
+ priv->maximum_unknown_frame_count) {
+ entry->packets_flooded++;
+ DPRINTK("LEC_ARP: Flooding..\n");
+ found = priv->mcast_vcc;
goto out;
- }
- /* We got here because entry->status == ESI_FLUSH_PENDING
+ }
+ /*
+ * We got here because entry->status == ESI_FLUSH_PENDING
* or BUS flood limit was reached for an entry which is
* in ESI_ARP_PENDING or ESI_VC_PENDING state.
*/
- *ret_entry = entry;
- DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc);
- found = NULL;
- } else {
- /* No matching entry was found */
- entry = make_entry(priv, mac_to_find);
- DPRINTK("LEC_ARP: Making entry\n");
- if (!entry) {
- found = priv->mcast_vcc;
+ lec_arp_hold(entry);
+ *ret_entry = entry;
+ DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status,
+ entry->vcc);
+ found = NULL;
+ } else {
+ /* No matching entry was found */
+ entry = make_entry(priv, mac_to_find);
+ DPRINTK("LEC_ARP: Making entry\n");
+ if (!entry) {
+ found = priv->mcast_vcc;
goto out;
- }
- lec_arp_add(priv, entry);
- /* We want arp-request(s) to be sent */
- entry->packets_flooded =1;
- entry->status = ESI_ARP_PENDING;
- entry->no_tries = 1;
- entry->last_used = entry->timestamp = jiffies;
- entry->is_rdesc = is_rdesc;
- if (entry->is_rdesc)
- send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, NULL);
- else
- send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
- entry->timer.expires = jiffies + (1*HZ);
- entry->timer.function = lec_arp_expire_arp;
- add_timer(&entry->timer);
- found = priv->mcast_vcc;
- }
+ }
+ lec_arp_add(priv, entry);
+ /* We want arp-request(s) to be sent */
+ entry->packets_flooded = 1;
+ entry->status = ESI_ARP_PENDING;
+ entry->no_tries = 1;
+ entry->last_used = entry->timestamp = jiffies;
+ entry->is_rdesc = is_rdesc;
+ if (entry->is_rdesc)
+ send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL,
+ NULL);
+ else
+ send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
+ entry->timer.expires = jiffies + (1 * HZ);
+ entry->timer.function = lec_arp_expire_arp;
+ add_timer(&entry->timer);
+ found = priv->mcast_vcc;
+ }
out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
@@ -2076,30 +2051,30 @@ out:
}
static int
-lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
- unsigned long permanent)
+lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+ unsigned long permanent)
{
unsigned long flags;
- struct lec_arp_table *entry, *next;
- int i;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry;
+ int i;
- DPRINTK("lec_addr_delete\n");
+ DPRINTK("lec_addr_delete\n");
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) {
- next = entry->next;
- if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)
- && (permanent ||
- !(entry->flags & LEC_PERMANENT_FLAG))) {
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+ if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)
+ && (permanent ||
+ !(entry->flags & LEC_PERMANENT_FLAG))) {
lec_arp_remove(priv, entry);
- kfree(entry);
- }
+ lec_arp_put(entry);
+ }
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- return 0;
- }
- }
+ return 0;
+ }
+ }
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- return -1;
+ return -1;
}
/*
@@ -2107,109 +2082,98 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
*/
static void
lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
- unsigned char *atm_addr, unsigned long remoteflag,
- unsigned int targetless_le_arp)
+ unsigned char *atm_addr, unsigned long remoteflag,
+ unsigned int targetless_le_arp)
{
unsigned long flags;
- struct lec_arp_table *entry, *tmp;
- int i;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry, *tmp;
+ int i;
- DPRINTK("lec:%s", (targetless_le_arp) ? "targetless ": " ");
- DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
- mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3],
- mac_addr[4],mac_addr[5]);
+ DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " ");
+ DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+ mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+ mac_addr[4], mac_addr[5]);
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- entry = lec_arp_find(priv, mac_addr);
- if (entry == NULL && targetless_le_arp)
- goto out; /* LANE2: ignore targetless LE_ARPs for which
- * we have no entry in the cache. 7.1.30
- */
- if (priv->lec_arp_empty_ones) {
- entry = priv->lec_arp_empty_ones;
- if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) {
- priv->lec_arp_empty_ones = entry->next;
- } else {
- while(entry->next && memcmp(entry->next->atm_addr,
- atm_addr, ATM_ESA_LEN))
- entry = entry->next;
- if (entry->next) {
- tmp = entry;
- entry = entry->next;
- tmp->next = entry->next;
- } else
- entry = NULL;
-
- }
- if (entry) {
- del_timer(&entry->timer);
- tmp = lec_arp_find(priv, mac_addr);
- if (tmp) {
- del_timer(&tmp->timer);
- tmp->status = ESI_FORWARD_DIRECT;
- memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN);
- tmp->vcc = entry->vcc;
- tmp->old_push = entry->old_push;
- tmp->last_used = jiffies;
- del_timer(&entry->timer);
- kfree(entry);
- entry=tmp;
- } else {
- entry->status = ESI_FORWARD_DIRECT;
- memcpy(entry->mac_addr, mac_addr, ETH_ALEN);
- entry->last_used = jiffies;
- lec_arp_add(priv, entry);
- }
- if (remoteflag)
- entry->flags|=LEC_REMOTE_FLAG;
- else
- entry->flags&=~LEC_REMOTE_FLAG;
- DPRINTK("After update\n");
- dump_arp_table(priv);
- goto out;
- }
- }
- entry = lec_arp_find(priv, mac_addr);
- if (!entry) {
- entry = make_entry(priv, mac_addr);
- if (!entry)
+ entry = lec_arp_find(priv, mac_addr);
+ if (entry == NULL && targetless_le_arp)
+ goto out; /*
+ * LANE2: ignore targetless LE_ARPs for which
+ * we have no entry in the cache. 7.1.30
+ */
+ if (!hlist_empty(&priv->lec_arp_empty_ones)) {
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+ if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) {
+ hlist_del(&entry->next);
+ del_timer(&entry->timer);
+ tmp = lec_arp_find(priv, mac_addr);
+ if (tmp) {
+ del_timer(&tmp->timer);
+ tmp->status = ESI_FORWARD_DIRECT;
+ memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN);
+ tmp->vcc = entry->vcc;
+ tmp->old_push = entry->old_push;
+ tmp->last_used = jiffies;
+ del_timer(&entry->timer);
+ lec_arp_put(entry);
+ entry = tmp;
+ } else {
+ entry->status = ESI_FORWARD_DIRECT;
+ memcpy(entry->mac_addr, mac_addr, ETH_ALEN);
+ entry->last_used = jiffies;
+ lec_arp_add(priv, entry);
+ }
+ if (remoteflag)
+ entry->flags |= LEC_REMOTE_FLAG;
+ else
+ entry->flags &= ~LEC_REMOTE_FLAG;
+ DPRINTK("After update\n");
+ dump_arp_table(priv);
+ goto out;
+ }
+ }
+ }
+
+ entry = lec_arp_find(priv, mac_addr);
+ if (!entry) {
+ entry = make_entry(priv, mac_addr);
+ if (!entry)
goto out;
- entry->status = ESI_UNKNOWN;
- lec_arp_add(priv, entry);
- /* Temporary, changes before end of function */
- }
- memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
- del_timer(&entry->timer);
- for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) {
- if (entry != tmp &&
- !memcmp(tmp->atm_addr, atm_addr,
- ATM_ESA_LEN)) {
- /* Vcc to this host exists */
- if (tmp->status > ESI_VC_PENDING) {
- /*
- * ESI_FLUSH_PENDING,
- * ESI_FORWARD_DIRECT
- */
- entry->vcc = tmp->vcc;
- entry->old_push=tmp->old_push;
- }
- entry->status=tmp->status;
- break;
- }
- }
- }
- if (remoteflag)
- entry->flags|=LEC_REMOTE_FLAG;
- else
- entry->flags&=~LEC_REMOTE_FLAG;
- if (entry->status == ESI_ARP_PENDING ||
- entry->status == ESI_UNKNOWN) {
- entry->status = ESI_VC_PENDING;
- send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL);
- }
- DPRINTK("After update2\n");
- dump_arp_table(priv);
+ entry->status = ESI_UNKNOWN;
+ lec_arp_add(priv, entry);
+ /* Temporary, changes before end of function */
+ }
+ memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
+ del_timer(&entry->timer);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry(tmp, node, &priv->lec_arp_tables[i], next) {
+ if (entry != tmp &&
+ !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) {
+ /* Vcc to this host exists */
+ if (tmp->status > ESI_VC_PENDING) {
+ /*
+ * ESI_FLUSH_PENDING,
+ * ESI_FORWARD_DIRECT
+ */
+ entry->vcc = tmp->vcc;
+ entry->old_push = tmp->old_push;
+ }
+ entry->status = tmp->status;
+ break;
+ }
+ }
+ }
+ if (remoteflag)
+ entry->flags |= LEC_REMOTE_FLAG;
+ else
+ entry->flags &= ~LEC_REMOTE_FLAG;
+ if (entry->status == ESI_ARP_PENDING || entry->status == ESI_UNKNOWN) {
+ entry->status = ESI_VC_PENDING;
+ send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL);
+ }
+ DPRINTK("After update2\n");
+ dump_arp_table(priv);
out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
@@ -2219,299 +2183,299 @@ out:
*/
static void
lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
- struct atm_vcc *vcc,
- void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb))
+ struct atm_vcc *vcc,
+ void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
{
unsigned long flags;
- struct lec_arp_table *entry;
- int i, found_entry=0;
+ struct hlist_node *node;
+ struct lec_arp_table *entry;
+ int i, found_entry = 0;
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- if (ioc_data->receive == 2) {
- /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
+ if (ioc_data->receive == 2) {
+ /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
- DPRINTK("LEC_ARP: Attaching mcast forward\n");
+ DPRINTK("LEC_ARP: Attaching mcast forward\n");
#if 0
- entry = lec_arp_find(priv, bus_mac);
- if (!entry) {
- printk("LEC_ARP: Multicast entry not found!\n");
+ entry = lec_arp_find(priv, bus_mac);
+ if (!entry) {
+ printk("LEC_ARP: Multicast entry not found!\n");
goto out;
- }
- memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- entry->recv_vcc = vcc;
- entry->old_recv_push = old_push;
+ }
+ memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+ entry->recv_vcc = vcc;
+ entry->old_recv_push = old_push;
#endif
- entry = make_entry(priv, bus_mac);
- if (entry == NULL)
+ entry = make_entry(priv, bus_mac);
+ if (entry == NULL)
goto out;
- del_timer(&entry->timer);
- memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- entry->recv_vcc = vcc;
- entry->old_recv_push = old_push;
- entry->next = priv->mcast_fwds;
- priv->mcast_fwds = entry;
- goto out;
- } else if (ioc_data->receive == 1) {
- /* Vcc which we don't want to make default vcc, attach it
- anyway. */
- DPRINTK("LEC_ARP:Attaching data direct, not default :%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
- ioc_data->atm_addr[0],ioc_data->atm_addr[1],
- ioc_data->atm_addr[2],ioc_data->atm_addr[3],
- ioc_data->atm_addr[4],ioc_data->atm_addr[5],
- ioc_data->atm_addr[6],ioc_data->atm_addr[7],
- ioc_data->atm_addr[8],ioc_data->atm_addr[9],
- ioc_data->atm_addr[10],ioc_data->atm_addr[11],
- ioc_data->atm_addr[12],ioc_data->atm_addr[13],
- ioc_data->atm_addr[14],ioc_data->atm_addr[15],
- ioc_data->atm_addr[16],ioc_data->atm_addr[17],
- ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
- entry = make_entry(priv, bus_mac);
- if (entry == NULL)
+ del_timer(&entry->timer);
+ memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+ entry->recv_vcc = vcc;
+ entry->old_recv_push = old_push;
+ hlist_add_head(&entry->next, &priv->mcast_fwds);
+ goto out;
+ } else if (ioc_data->receive == 1) {
+ /*
+ * Vcc which we don't want to make default vcc,
+ * attach it anyway.
+ */
+ DPRINTK
+ ("LEC_ARP:Attaching data direct, not default: "
+ "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+ ioc_data->atm_addr[0], ioc_data->atm_addr[1],
+ ioc_data->atm_addr[2], ioc_data->atm_addr[3],
+ ioc_data->atm_addr[4], ioc_data->atm_addr[5],
+ ioc_data->atm_addr[6], ioc_data->atm_addr[7],
+ ioc_data->atm_addr[8], ioc_data->atm_addr[9],
+ ioc_data->atm_addr[10], ioc_data->atm_addr[11],
+ ioc_data->atm_addr[12], ioc_data->atm_addr[13],
+ ioc_data->atm_addr[14], ioc_data->atm_addr[15],
+ ioc_data->atm_addr[16], ioc_data->atm_addr[17],
+ ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+ entry = make_entry(priv, bus_mac);
+ if (entry == NULL)
goto out;
- memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- memset(entry->mac_addr, 0, ETH_ALEN);
- entry->recv_vcc = vcc;
- entry->old_recv_push = old_push;
- entry->status = ESI_UNKNOWN;
- entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
- add_timer(&entry->timer);
- entry->next = priv->lec_no_forward;
- priv->lec_no_forward = entry;
+ memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+ memset(entry->mac_addr, 0, ETH_ALEN);
+ entry->recv_vcc = vcc;
+ entry->old_recv_push = old_push;
+ entry->status = ESI_UNKNOWN;
+ entry->timer.expires = jiffies + priv->vcc_timeout_period;
+ entry->timer.function = lec_arp_expire_vcc;
+ hlist_add_head(&entry->next, &priv->lec_no_forward);
+ add_timer(&entry->timer);
dump_arp_table(priv);
goto out;
- }
- DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
- ioc_data->atm_addr[0],ioc_data->atm_addr[1],
- ioc_data->atm_addr[2],ioc_data->atm_addr[3],
- ioc_data->atm_addr[4],ioc_data->atm_addr[5],
- ioc_data->atm_addr[6],ioc_data->atm_addr[7],
- ioc_data->atm_addr[8],ioc_data->atm_addr[9],
- ioc_data->atm_addr[10],ioc_data->atm_addr[11],
- ioc_data->atm_addr[12],ioc_data->atm_addr[13],
- ioc_data->atm_addr[14],ioc_data->atm_addr[15],
- ioc_data->atm_addr[16],ioc_data->atm_addr[17],
- ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
- for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
- if (memcmp(ioc_data->atm_addr, entry->atm_addr,
- ATM_ESA_LEN)==0) {
- DPRINTK("LEC_ARP: Attaching data direct\n");
- DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n",
- entry->vcc?entry->vcc->vci:0,
- entry->recv_vcc?entry->recv_vcc->vci:0);
- found_entry=1;
- del_timer(&entry->timer);
- entry->vcc = vcc;
- entry->old_push = old_push;
- if (entry->status == ESI_VC_PENDING) {
- if(priv->maximum_unknown_frame_count
- ==0)
- entry->status =
- ESI_FORWARD_DIRECT;
- else {
- entry->timestamp = jiffies;
- entry->status =
- ESI_FLUSH_PENDING;
+ }
+ DPRINTK
+ ("LEC_ARP:Attaching data direct, default: "
+ "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+ ioc_data->atm_addr[0], ioc_data->atm_addr[1],
+ ioc_data->atm_addr[2], ioc_data->atm_addr[3],
+ ioc_data->atm_addr[4], ioc_data->atm_addr[5],
+ ioc_data->atm_addr[6], ioc_data->atm_addr[7],
+ ioc_data->atm_addr[8], ioc_data->atm_addr[9],
+ ioc_data->atm_addr[10], ioc_data->atm_addr[11],
+ ioc_data->atm_addr[12], ioc_data->atm_addr[13],
+ ioc_data->atm_addr[14], ioc_data->atm_addr[15],
+ ioc_data->atm_addr[16], ioc_data->atm_addr[17],
+ ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+ if (memcmp
+ (ioc_data->atm_addr, entry->atm_addr,
+ ATM_ESA_LEN) == 0) {
+ DPRINTK("LEC_ARP: Attaching data direct\n");
+ DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n",
+ entry->vcc ? entry->vcc->vci : 0,
+ entry->recv_vcc ? entry->recv_vcc->
+ vci : 0);
+ found_entry = 1;
+ del_timer(&entry->timer);
+ entry->vcc = vcc;
+ entry->old_push = old_push;
+ if (entry->status == ESI_VC_PENDING) {
+ if (priv->maximum_unknown_frame_count
+ == 0)
+ entry->status =
+ ESI_FORWARD_DIRECT;
+ else {
+ entry->timestamp = jiffies;
+ entry->status =
+ ESI_FLUSH_PENDING;
#if 0
- send_to_lecd(priv,l_flush_xmt,
- NULL,
- entry->atm_addr,
- NULL);
+ send_to_lecd(priv, l_flush_xmt,
+ NULL,
+ entry->atm_addr,
+ NULL);
#endif
- }
- } else {
- /* They were forming a connection
- to us, and we to them. Our
- ATM address is numerically lower
- than theirs, so we make connection
- we formed into default VCC (8.1.11).
- Connection they made gets torn
- down. This might confuse some
- clients. Can be changed if
- someone reports trouble... */
- ;
- }
- }
- }
- }
- if (found_entry) {
- DPRINTK("After vcc was added\n");
- dump_arp_table(priv);
+ }
+ } else {
+ /*
+ * They were forming a connection
+ * to us, and we to them. Our
+ * ATM address is numerically lower
+ * than theirs, so we make connection
+ * we formed into default VCC (8.1.11).
+ * Connection they made gets torn
+ * down. This might confuse some
+ * clients. Can be changed if
+ * someone reports trouble...
+ */
+ ;
+ }
+ }
+ }
+ }
+ if (found_entry) {
+ DPRINTK("After vcc was added\n");
+ dump_arp_table(priv);
goto out;
- }
- /* Not found, snatch address from first data packet that arrives from
- this vcc */
- entry = make_entry(priv, bus_mac);
- if (!entry)
+ }
+ /*
+ * Not found, snatch address from first data packet that arrives
+ * from this vcc
+ */
+ entry = make_entry(priv, bus_mac);
+ if (!entry)
goto out;
- entry->vcc = vcc;
- entry->old_push = old_push;
- memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- memset(entry->mac_addr, 0, ETH_ALEN);
- entry->status = ESI_UNKNOWN;
- entry->next = priv->lec_arp_empty_ones;
- priv->lec_arp_empty_ones = entry;
- entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
- add_timer(&entry->timer);
- DPRINTK("After vcc was added\n");
+ entry->vcc = vcc;
+ entry->old_push = old_push;
+ memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+ memset(entry->mac_addr, 0, ETH_ALEN);
+ entry->status = ESI_UNKNOWN;
+ hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
+ entry->timer.expires = jiffies + priv->vcc_timeout_period;
+ entry->timer.function = lec_arp_expire_vcc;
+ add_timer(&entry->timer);
+ DPRINTK("After vcc was added\n");
dump_arp_table(priv);
out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
-static void
-lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
+static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
{
unsigned long flags;
- struct lec_arp_table *entry;
- int i;
-
- DPRINTK("LEC:lec_flush_complete %lx\n",tran_id);
+ struct hlist_node *node;
+ struct lec_arp_table *entry;
+ int i;
+
+ DPRINTK("LEC:lec_flush_complete %lx\n", tran_id);
+restart:
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
- for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
- if (entry->flush_tran_id == tran_id &&
- entry->status == ESI_FLUSH_PENDING) {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
- lec_send(entry->vcc, skb, entry->priv);
- entry->status = ESI_FORWARD_DIRECT;
- DPRINTK("LEC_ARP: Flushed\n");
- }
- }
- }
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+ if (entry->flush_tran_id == tran_id
+ && entry->status == ESI_FLUSH_PENDING) {
+ struct sk_buff *skb;
+ struct atm_vcc *vcc = entry->vcc;
+
+ lec_arp_hold(entry);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+ while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
+ lec_send(vcc, skb, entry->priv);
+ entry->last_used = jiffies;
+ entry->status = ESI_FORWARD_DIRECT;
+ lec_arp_put(entry);
+ DPRINTK("LEC_ARP: Flushed\n");
+ goto restart;
+ }
+ }
+ }
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- dump_arp_table(priv);
+ dump_arp_table(priv);
}
static void
lec_set_flush_tran_id(struct lec_priv *priv,
- unsigned char *atm_addr, unsigned long tran_id)
+ unsigned char *atm_addr, unsigned long tran_id)
{
unsigned long flags;
- struct lec_arp_table *entry;
- int i;
+ struct hlist_node *node;
+ struct lec_arp_table *entry;
+ int i;
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
- for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next)
- if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
- entry->flush_tran_id = tran_id;
- DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry);
- }
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
+ hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+ if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
+ entry->flush_tran_id = tran_id;
+ DPRINTK("Set flush transaction id to %lx for %p\n",
+ tran_id, entry);
+ }
+ }
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
-static int
-lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
+static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
{
unsigned long flags;
- unsigned char mac_addr[] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
- struct lec_arp_table *to_add;
+ unsigned char mac_addr[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ };
+ struct lec_arp_table *to_add;
struct lec_vcc_priv *vpriv;
int err = 0;
-
+
if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
return -ENOMEM;
vpriv->xoff = 0;
vpriv->old_pop = vcc->pop;
vcc->user_back = vpriv;
- vcc->pop = lec_pop;
+ vcc->pop = lec_pop;
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- to_add = make_entry(priv, mac_addr);
- if (!to_add) {
+ to_add = make_entry(priv, mac_addr);
+ if (!to_add) {
vcc->pop = vpriv->old_pop;
kfree(vpriv);
- err = -ENOMEM;
+ err = -ENOMEM;
goto out;
- }
- memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN);
- to_add->status = ESI_FORWARD_DIRECT;
- to_add->flags |= LEC_PERMANENT_FLAG;
- to_add->vcc = vcc;
- to_add->old_push = vcc->push;
- vcc->push = lec_push;
- priv->mcast_vcc = vcc;
- lec_arp_add(priv, to_add);
+ }
+ memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN);
+ to_add->status = ESI_FORWARD_DIRECT;
+ to_add->flags |= LEC_PERMANENT_FLAG;
+ to_add->vcc = vcc;
+ to_add->old_push = vcc->push;
+ vcc->push = lec_push;
+ priv->mcast_vcc = vcc;
+ lec_arp_add(priv, to_add);
out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- return err;
+ return err;
}
-static void
-lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
+static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
{
unsigned long flags;
- struct lec_arp_table *entry, *next;
- int i;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry;
+ int i;
+
+ DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci);
+ dump_arp_table(priv);
- DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci);
- dump_arp_table(priv);
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for(entry = priv->lec_arp_tables[i];entry; entry=next) {
- next = entry->next;
- if (vcc == entry->vcc) {
- lec_arp_remove(priv, entry);
- kfree(entry);
- if (priv->mcast_vcc == vcc) {
- priv->mcast_vcc = NULL;
- }
- }
- }
- }
-
- entry = priv->lec_arp_empty_ones;
- priv->lec_arp_empty_ones = NULL;
- while (entry != NULL) {
- next = entry->next;
- if (entry->vcc == vcc) { /* leave it out from the list */
- lec_arp_clear_vccs(entry);
- del_timer(&entry->timer);
- kfree(entry);
- }
- else { /* put it back to the list */
- entry->next = priv->lec_arp_empty_ones;
- priv->lec_arp_empty_ones = entry;
- }
- entry = next;
- }
-
- entry = priv->lec_no_forward;
- priv->lec_no_forward = NULL;
- while (entry != NULL) {
- next = entry->next;
- if (entry->recv_vcc == vcc) {
- lec_arp_clear_vccs(entry);
- del_timer(&entry->timer);
- kfree(entry);
- }
- else {
- entry->next = priv->lec_no_forward;
- priv->lec_no_forward = entry;
- }
- entry = next;
- }
-
- entry = priv->mcast_fwds;
- priv->mcast_fwds = NULL;
- while (entry != NULL) {
- next = entry->next;
- if (entry->recv_vcc == vcc) {
- lec_arp_clear_vccs(entry);
- /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
- kfree(entry);
- }
- else {
- entry->next = priv->mcast_fwds;
- priv->mcast_fwds = entry;
- }
- entry = next;
- }
+
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+ if (vcc == entry->vcc) {
+ lec_arp_remove(priv, entry);
+ lec_arp_put(entry);
+ if (priv->mcast_vcc == vcc) {
+ priv->mcast_vcc = NULL;
+ }
+ }
+ }
+ }
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+ if (entry->vcc == vcc) {
+ lec_arp_clear_vccs(entry);
+ del_timer(&entry->timer);
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ }
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) {
+ if (entry->recv_vcc == vcc) {
+ lec_arp_clear_vccs(entry);
+ del_timer(&entry->timer);
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ }
+
+ hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+ if (entry->recv_vcc == vcc) {
+ lec_arp_clear_vccs(entry);
+ /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
+ hlist_del(&entry->next);
+ lec_arp_put(entry);
+ }
+ }
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
dump_arp_table(priv);
@@ -2519,57 +2483,42 @@ lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
static void
lec_arp_check_empties(struct lec_priv *priv,
- struct atm_vcc *vcc, struct sk_buff *skb)
+ struct atm_vcc *vcc, struct sk_buff *skb)
{
- unsigned long flags;
- struct lec_arp_table *entry, *prev;
- struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
- unsigned char *src;
+ unsigned long flags;
+ struct hlist_node *node, *next;
+ struct lec_arp_table *entry, *tmp;
+ struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
+ unsigned char *src;
#ifdef CONFIG_TR
- struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data;
+ struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data;
- if (priv->is_trdev) src = tr_hdr->h_source;
- else
+ if (priv->is_trdev)
+ src = tr_hdr->h_source;
+ else
#endif
- src = hdr->h_source;
+ src = hdr->h_source;
spin_lock_irqsave(&priv->lec_arp_lock, flags);
- entry = priv->lec_arp_empty_ones;
- if (vcc == entry->vcc) {
- del_timer(&entry->timer);
- memcpy(entry->mac_addr, src, ETH_ALEN);
- entry->status = ESI_FORWARD_DIRECT;
- entry->last_used = jiffies;
- priv->lec_arp_empty_ones = entry->next;
- /* We might have got an entry */
- if ((prev = lec_arp_find(priv,src))) {
- lec_arp_remove(priv, prev);
- kfree(prev);
- }
- lec_arp_add(priv, entry);
- goto out;
- }
- prev = entry;
- entry = entry->next;
- while (entry && entry->vcc != vcc) {
- prev= entry;
- entry = entry->next;
- }
- if (!entry) {
- DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n");
- goto out;
- }
- del_timer(&entry->timer);
- memcpy(entry->mac_addr, src, ETH_ALEN);
- entry->status = ESI_FORWARD_DIRECT;
- entry->last_used = jiffies;
- prev->next = entry->next;
- if ((prev = lec_arp_find(priv, src))) {
- lec_arp_remove(priv, prev);
- kfree(prev);
- }
- lec_arp_add(priv, entry);
+ hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+ if (vcc == entry->vcc) {
+ del_timer(&entry->timer);
+ memcpy(entry->mac_addr, src, ETH_ALEN);
+ entry->status = ESI_FORWARD_DIRECT;
+ entry->last_used = jiffies;
+ /* We might have got an entry */
+ if ((tmp = lec_arp_find(priv, src))) {
+ lec_arp_remove(priv, tmp);
+ lec_arp_put(tmp);
+ }
+ hlist_del(&entry->next);
+ lec_arp_add(priv, entry);
+ goto out;
+ }
+ }
+ DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n");
out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
+
MODULE_LICENSE("GPL");
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 6606082b29a8..8ac6b7321635 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,9 +1,7 @@
/*
- *
* Lan Emulation client header file
*
- * Marko Kiiskila mkiiskila@yahoo.com
- *
+ * Marko Kiiskila <mkiiskila@yahoo.com>
*/
#ifndef _LEC_H_
@@ -17,18 +15,18 @@
#define LEC_HEADER_LEN 16
struct lecdatahdr_8023 {
- unsigned short le_header;
- unsigned char h_dest[ETH_ALEN];
- unsigned char h_source[ETH_ALEN];
- unsigned short h_type;
+ unsigned short le_header;
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_type;
};
struct lecdatahdr_8025 {
- unsigned short le_header;
- unsigned char ac_pad;
- unsigned char fc;
- unsigned char h_dest[ETH_ALEN];
- unsigned char h_source[ETH_ALEN];
+ unsigned short le_header;
+ unsigned char ac_pad;
+ unsigned char fc;
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
};
#define LEC_MINIMUM_8023_SIZE 62
@@ -45,17 +43,18 @@ struct lecdatahdr_8025 {
*
*/
struct lane2_ops {
- int (*resolve)(struct net_device *dev, u8 *dst_mac, int force,
- u8 **tlvs, u32 *sizeoftlvs);
- int (*associate_req)(struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs);
- void (*associate_indicator)(struct net_device *dev, u8 *mac_addr,
- u8 *tlvs, u32 sizeoftlvs);
+ int (*resolve) (struct net_device *dev, u8 *dst_mac, int force,
+ u8 **tlvs, u32 *sizeoftlvs);
+ int (*associate_req) (struct net_device *dev, u8 *lan_dst,
+ u8 *tlvs, u32 sizeoftlvs);
+ void (*associate_indicator) (struct net_device *dev, u8 *mac_addr,
+ u8 *tlvs, u32 sizeoftlvs);
};
/*
* ATM LAN Emulation supports both LLC & Dix Ethernet EtherType
* frames.
+ *
* 1. Dix Ethernet EtherType frames encoded by placing EtherType
* field in h_type field. Data follows immediatelly after header.
* 2. LLC Data frames whose total length, including LLC field and data,
@@ -71,72 +70,88 @@ struct lane2_ops {
#define LEC_ARP_TABLE_SIZE 16
struct lec_priv {
- struct net_device_stats stats;
- unsigned short lecid; /* Lecid of this client */
- struct lec_arp_table *lec_arp_empty_ones;
- /* Used for storing VCC's that don't have a MAC address attached yet */
- struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE];
- /* Actual LE ARP table */
- struct lec_arp_table *lec_no_forward;
- /* Used for storing VCC's (and forward packets from) which are to
- age out by not using them to forward packets.
- This is because to some LE clients there will be 2 VCCs. Only
- one of them gets used. */
- struct lec_arp_table *mcast_fwds;
- /* With LANEv2 it is possible that BUS (or a special multicast server)
- establishes multiple Multicast Forward VCCs to us. This list
- collects all those VCCs. LANEv1 client has only one item in this
- list. These entries are not aged out. */
- spinlock_t lec_arp_lock;
- struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
- struct atm_vcc *lecd;
- struct timer_list lec_arp_timer;
- /* C10 */
- unsigned int maximum_unknown_frame_count;
-/* Within the period of time defined by this variable, the client will send
- no more than C10 frames to BUS for a given unicast destination. (C11) */
- unsigned long max_unknown_frame_time;
-/* If no traffic has been sent in this vcc for this period of time,
- vcc will be torn down (C12)*/
- unsigned long vcc_timeout_period;
-/* An LE Client MUST not retry an LE_ARP_REQUEST for a
- given frame's LAN Destination more than maximum retry count times,
- after the first LEC_ARP_REQUEST (C13)*/
- unsigned short max_retry_count;
-/* Max time the client will maintain an entry in its arp cache in
- absence of a verification of that relationship (C17)*/
- unsigned long aging_time;
-/* Max time the client will maintain an entry in cache when
- topology change flag is true (C18) */
- unsigned long forward_delay_time;
-/* Topology change flag (C19)*/
- int topology_change;
-/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE
- cycle to take (C20)*/
- unsigned long arp_response_time;
-/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the
- LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/
- unsigned long flush_timeout;
-/* The time since sending a frame to the bus after which the
- LE Client may assume that the frame has been either discarded or
- delivered to the recipient (C22) */
- unsigned long path_switching_delay;
+ struct net_device_stats stats;
+ unsigned short lecid; /* Lecid of this client */
+ struct hlist_head lec_arp_empty_ones;
+ /* Used for storing VCC's that don't have a MAC address attached yet */
+ struct hlist_head lec_arp_tables[LEC_ARP_TABLE_SIZE];
+ /* Actual LE ARP table */
+ struct hlist_head lec_no_forward;
+ /*
+ * Used for storing VCC's (and forward packets from) which are to
+ * age out by not using them to forward packets.
+ * This is because to some LE clients there will be 2 VCCs. Only
+ * one of them gets used.
+ */
+ struct hlist_head mcast_fwds;
+ /*
+ * With LANEv2 it is possible that BUS (or a special multicast server)
+ * establishes multiple Multicast Forward VCCs to us. This list
+ * collects all those VCCs. LANEv1 client has only one item in this
+ * list. These entries are not aged out.
+ */
+ spinlock_t lec_arp_lock;
+ struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
+ struct atm_vcc *lecd;
+ struct work_struct lec_arp_work; /* C10 */
+ unsigned int maximum_unknown_frame_count;
+ /*
+ * Within the period of time defined by this variable, the client will send
+ * no more than C10 frames to BUS for a given unicast destination. (C11)
+ */
+ unsigned long max_unknown_frame_time;
+ /*
+ * If no traffic has been sent in this vcc for this period of time,
+ * vcc will be torn down (C12)
+ */
+ unsigned long vcc_timeout_period;
+ /*
+ * An LE Client MUST not retry an LE_ARP_REQUEST for a
+ * given frame's LAN Destination more than maximum retry count times,
+ * after the first LEC_ARP_REQUEST (C13)
+ */
+ unsigned short max_retry_count;
+ /*
+ * Max time the client will maintain an entry in its arp cache in
+ * absence of a verification of that relationship (C17)
+ */
+ unsigned long aging_time;
+ /*
+ * Max time the client will maintain an entry in cache when
+ * topology change flag is true (C18)
+ */
+ unsigned long forward_delay_time; /* Topology change flag (C19) */
+ int topology_change;
+ /*
+ * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE
+ * cycle to take (C20)
+ */
+ unsigned long arp_response_time;
+ /*
+ * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the
+ * LE_FLUSH_REQUEST has been sent before taking recover action. (C21)
+ */
+ unsigned long flush_timeout;
+ /* The time since sending a frame to the bus after which the
+ * LE Client may assume that the frame has been either discarded or
+ * delivered to the recipient (C22)
+ */
+ unsigned long path_switching_delay;
- u8 *tlvs; /* LANE2: TLVs are new */
- u32 sizeoftlvs; /* The size of the tlv array in bytes */
- int lane_version; /* LANE2 */
- int itfnum; /* e.g. 2 for lec2, 5 for lec5 */
- struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */
- int is_proxy; /* bridge between ATM and Ethernet */
- int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */
+ u8 *tlvs; /* LANE2: TLVs are new */
+ u32 sizeoftlvs; /* The size of the tlv array in bytes */
+ int lane_version; /* LANE2 */
+ int itfnum; /* e.g. 2 for lec2, 5 for lec5 */
+ struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */
+ int is_proxy; /* bridge between ATM and Ethernet */
+ int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */
};
struct lec_vcc_priv {
- void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
+ void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb);
int xoff;
};
#define LEC_VCC_PRIV(vcc) ((struct lec_vcc_priv *)((vcc)->user_back))
-#endif /* _LEC_H_ */
-
+#endif /* _LEC_H_ */
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index 397448094648..ec67435a40a6 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,92 +1,96 @@
/*
* Lec arp cache
- * Marko Kiiskila mkiiskila@yahoo.com
*
+ * Marko Kiiskila <mkiiskila@yahoo.com>
*/
-#ifndef _LEC_ARP_H
-#define _LEC_ARP_H
+#ifndef _LEC_ARP_H_
+#define _LEC_ARP_H_
#include <linux/atm.h>
#include <linux/atmdev.h>
#include <linux/if_ether.h>
#include <linux/atmlec.h>
struct lec_arp_table {
- struct lec_arp_table *next; /* Linked entry list */
- unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */
- unsigned char mac_addr[ETH_ALEN]; /* Mac address */
- int is_rdesc; /* Mac address is a route descriptor */
- struct atm_vcc *vcc; /* Vcc this entry is attached */
- struct atm_vcc *recv_vcc; /* Vcc we receive data from */
- void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb);
- /* Push that leads to daemon */
- void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb);
- /* Push that leads to daemon */
- void (*old_close)(struct atm_vcc *vcc);
- /* We want to see when this
- * vcc gets closed */
- unsigned long last_used; /* For expiry */
- unsigned long timestamp; /* Used for various timestamping
- * things:
- * 1. FLUSH started
- * (status=ESI_FLUSH_PENDING)
- * 2. Counting to
- * max_unknown_frame_time
- * (status=ESI_ARP_PENDING||
- * status=ESI_VC_PENDING)
- */
- unsigned char no_tries; /* No of times arp retry has been
- tried */
- unsigned char status; /* Status of this entry */
- unsigned short flags; /* Flags for this entry */
- unsigned short packets_flooded; /* Data packets flooded */
- unsigned long flush_tran_id; /* Transaction id in flush protocol */
- struct timer_list timer; /* Arping timer */
- struct lec_priv *priv; /* Pointer back */
+ struct hlist_node next; /* Linked entry list */
+ unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */
+ unsigned char mac_addr[ETH_ALEN]; /* Mac address */
+ int is_rdesc; /* Mac address is a route descriptor */
+ struct atm_vcc *vcc; /* Vcc this entry is attached */
+ struct atm_vcc *recv_vcc; /* Vcc we receive data from */
- u8 *tlvs; /* LANE2: Each MAC address can have TLVs */
- u32 sizeoftlvs; /* associated with it. sizeoftlvs tells the */
- /* the length of the tlvs array */
- struct sk_buff_head tx_wait; /* wait queue for outgoing packets */
+ void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb);
+ /* Push that leads to daemon */
+
+ void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb);
+ /* Push that leads to daemon */
+
+ unsigned long last_used; /* For expiry */
+ unsigned long timestamp; /* Used for various timestamping things:
+ * 1. FLUSH started
+ * (status=ESI_FLUSH_PENDING)
+ * 2. Counting to
+ * max_unknown_frame_time
+ * (status=ESI_ARP_PENDING||
+ * status=ESI_VC_PENDING)
+ */
+ unsigned char no_tries; /* No of times arp retry has been tried */
+ unsigned char status; /* Status of this entry */
+ unsigned short flags; /* Flags for this entry */
+ unsigned short packets_flooded; /* Data packets flooded */
+ unsigned long flush_tran_id; /* Transaction id in flush protocol */
+ struct timer_list timer; /* Arping timer */
+ struct lec_priv *priv; /* Pointer back */
+ u8 *tlvs;
+ u32 sizeoftlvs; /*
+ * LANE2: Each MAC address can have TLVs
+ * associated with it. sizeoftlvs tells the
+ * the length of the tlvs array
+ */
+ struct sk_buff_head tx_wait; /* wait queue for outgoing packets */
+ atomic_t usage; /* usage count */
};
-struct tlv { /* LANE2: Template tlv struct for accessing */
- /* the tlvs in the lec_arp_table->tlvs array*/
- u32 type;
- u8 length;
- u8 value[255];
+/*
+ * LANE2: Template tlv struct for accessing
+ * the tlvs in the lec_arp_table->tlvs array
+ */
+struct tlv {
+ u32 type;
+ u8 length;
+ u8 value[255];
};
/* Status fields */
-#define ESI_UNKNOWN 0 /*
- * Next packet sent to this mac address
- * causes ARP-request to be sent
- */
-#define ESI_ARP_PENDING 1 /*
- * There is no ATM address associated with this
- * 48-bit address. The LE-ARP protocol is in
- * progress.
- */
-#define ESI_VC_PENDING 2 /*
- * There is a valid ATM address associated with
- * this 48-bit address but there is no VC set
- * up to that ATM address. The signaling
- * protocol is in process.
- */
-#define ESI_FLUSH_PENDING 4 /*
- * The LEC has been notified of the FLUSH_START
- * status and it is assumed that the flush
- * protocol is in process.
- */
-#define ESI_FORWARD_DIRECT 5 /*
- * Either the Path Switching Delay (C22) has
- * elapsed or the LEC has notified the Mapping
- * that the flush protocol has completed. In
- * either case, it is safe to forward packets
- * to this address via the data direct VC.
- */
+#define ESI_UNKNOWN 0 /*
+ * Next packet sent to this mac address
+ * causes ARP-request to be sent
+ */
+#define ESI_ARP_PENDING 1 /*
+ * There is no ATM address associated with this
+ * 48-bit address. The LE-ARP protocol is in
+ * progress.
+ */
+#define ESI_VC_PENDING 2 /*
+ * There is a valid ATM address associated with
+ * this 48-bit address but there is no VC set
+ * up to that ATM address. The signaling
+ * protocol is in process.
+ */
+#define ESI_FLUSH_PENDING 4 /*
+ * The LEC has been notified of the FLUSH_START
+ * status and it is assumed that the flush
+ * protocol is in process.
+ */
+#define ESI_FORWARD_DIRECT 5 /*
+ * Either the Path Switching Delay (C22) has
+ * elapsed or the LEC has notified the Mapping
+ * that the flush protocol has completed. In
+ * either case, it is safe to forward packets
+ * to this address via the data direct VC.
+ */
/* Flag values */
#define LEC_REMOTE_FLAG 0x0001
#define LEC_PERMANENT_FLAG 0x0002
-#endif
+#endif /* _LEC_ARP_H_ */
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a48a5d580408..0d2b994af511 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -25,7 +25,6 @@
#include <linux/atmlec.h>
#include <linux/atmmpc.h>
/* Modular too */
-#include <linux/config.h>
#include <linux/module.h>
#include "lec.h"
@@ -99,11 +98,6 @@ static struct notifier_block mpoa_notifier = {
0
};
-#ifdef CONFIG_PROC_FS
-extern int mpc_proc_init(void);
-extern void mpc_proc_clean(void);
-#endif
-
struct mpoa_client *mpcs = NULL; /* FIXME */
static struct atm_mpoa_qos *qos_head = NULL;
static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
@@ -259,10 +253,9 @@ static struct mpoa_client *alloc_mpc(void)
{
struct mpoa_client *mpc;
- mpc = kmalloc(sizeof (struct mpoa_client), GFP_KERNEL);
+ mpc = kzalloc(sizeof (struct mpoa_client), GFP_KERNEL);
if (mpc == NULL)
return NULL;
- memset(mpc, 0, sizeof(struct mpoa_client));
rwlock_init(&mpc->ingress_lock);
rwlock_init(&mpc->egress_lock);
mpc->next = mpcs;
@@ -567,7 +560,6 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
struct atmmpc_ioc ioc_data;
in_cache_entry *in_entry;
uint32_t ipaddr;
- unsigned char *ip;
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc));
if (bytes_left != 0) {
@@ -590,9 +582,8 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
if (in_entry != NULL) mpc->in_ops->put(in_entry);
return -EINVAL;
}
- ip = (unsigned char*)&in_entry->ctrl_info.in_dst_ip;
printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %u.%u.%u.%u\n",
- mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+ mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip));
in_entry->shortcut = vcc;
mpc->in_ops->put(in_entry);
} else {
@@ -623,10 +614,8 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name);
in_entry = mpc->in_ops->get_by_vcc(vcc, mpc);
if (in_entry) {
- unsigned char *ip __attribute__ ((unused)) =
- (unsigned char *)&in_entry->ctrl_info.in_dst_ip;
dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %u.%u.%u.%u\n",
- mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+ mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip));
in_entry->shortcut = NULL;
mpc->in_ops->put(in_entry);
}
@@ -1113,10 +1102,9 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien
static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
- unsigned char *ip;
-
uint32_t dst_ip = msg->content.in_info.in_dst_ip;
in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc);
+
dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip));
ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry);
if(entry == NULL){
@@ -1162,18 +1150,17 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
uint32_t dst_ip = msg->content.in_info.in_dst_ip;
uint32_t mask = msg->ip_mask;
- unsigned char *ip = (unsigned char *)&dst_ip;
in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
if(entry == NULL){
printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ", mpc->dev->name);
- printk("ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]);
+ printk("ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip));
return;
}
do {
dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %u.%u.%u.%u\n" ,
- mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+ mpc->dev->name, NIPQUAD(dst_ip));
write_lock_bh(&mpc->ingress_lock);
mpc->in_ops->remove_entry(entry, mpc);
write_unlock_bh(&mpc->ingress_lock);
@@ -1442,12 +1429,8 @@ static __init int atm_mpoa_init(void)
{
register_atm_ioctl(&atm_ioctl_ops);
-#ifdef CONFIG_PROC_FS
if (mpc_proc_init() != 0)
printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
- else
- printk(KERN_INFO "mpoa: /proc/mpoa initialized\n");
-#endif
printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
@@ -1460,9 +1443,7 @@ static void __exit atm_mpoa_cleanup(void)
struct atm_mpoa_qos *qos, *nextqos;
struct lec_priv *priv;
-#ifdef CONFIG_PROC_FS
mpc_proc_clean();
-#endif
del_timer(&mpc_timer);
unregister_netdevice_notifier(&mpoa_notifier);
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 863ddf6079e1..3c7981a229e8 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos);
struct seq_file;
void atm_mpoa_disp_qos(struct seq_file *m);
+#ifdef CONFIG_PROC_FS
+int mpc_proc_init(void);
+void mpc_proc_clean(void);
+#else
+#define mpc_proc_init() (0)
+#define mpc_proc_clean() do { } while(0)
+#endif
+
#endif /* _MPC_H_ */
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 781ed1b9329d..fbf13cdcf46e 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -87,7 +87,6 @@ static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc,
static in_cache_entry *in_cache_add_entry(uint32_t dst_ip,
struct mpoa_client *client)
{
- unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip;
in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL);
if (entry == NULL) {
@@ -95,7 +94,7 @@ static in_cache_entry *in_cache_add_entry(uint32_t dst_ip,
return NULL;
}
- dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]);
+ dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip));
memset(entry,0,sizeof(in_cache_entry));
atomic_set(&entry->use, 1);
@@ -152,10 +151,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
if( entry->count > mpc->parameters.mpc_p1 &&
entry->entry_state == INGRESS_INVALID){
- unsigned char *ip __attribute__ ((unused)) =
- (unsigned char *)&entry->ctrl_info.in_dst_ip;
-
- dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+ dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, NIPQUAD(entry->ctrl_info.in_dst_ip));
entry->entry_state = INGRESS_RESOLVING;
msg.type = SND_MPOA_RES_RQST;
memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN );
@@ -187,11 +183,9 @@ static void in_cache_remove_entry(in_cache_entry *entry,
{
struct atm_vcc *vcc;
struct k_message msg;
- unsigned char *ip;
vcc = entry->shortcut;
- ip = (unsigned char *)&entry->ctrl_info.in_dst_ip;
- dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",ip[0], ip[1], ip[2], ip[3]);
+ dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",NIPQUAD(entry->ctrl_info.in_dst_ip));
if (entry->prev != NULL)
entry->prev->next = entry->next;
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 60834b5a14d6..d37b8911b3ab 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,4 +1,3 @@
-#include <linux/config.h>
#ifdef CONFIG_PROC_FS
#include <linux/errno.h>
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 1489067c1e84..19d5dfc0702f 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -34,7 +34,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/atm.h>
@@ -288,10 +287,9 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
if (be.encaps != PPPOATM_ENCAPS_AUTODETECT &&
be.encaps != PPPOATM_ENCAPS_VC && be.encaps != PPPOATM_ENCAPS_LLC)
return -EINVAL;
- pvcc = kmalloc(sizeof(*pvcc), GFP_KERNEL);
+ pvcc = kzalloc(sizeof(*pvcc), GFP_KERNEL);
if (pvcc == NULL)
return -ENOMEM;
- memset(pvcc, 0, sizeof(*pvcc));
pvcc->atmvcc = atmvcc;
pvcc->old_push = atmvcc->push;
pvcc->old_pop = atmvcc->pop;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4041054e5282..91fe5f53ff11 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -8,7 +8,6 @@
* the reader.
*/
-#include <linux/config.h>
#include <linux/module.h> /* for EXPORT_SYMBOL */
#include <linux/string.h>
#include <linux/types.h>
@@ -508,7 +507,7 @@ err_out:
goto out;
}
-void __exit atm_proc_exit(void)
+void atm_proc_exit(void)
{
atm_proc_dirs_remove();
}
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index f2c541774dcd..b2148b43a426 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -3,7 +3,6 @@
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
-#include <linux/config.h>
#include <linux/net.h> /* struct socket, struct proto_ops */
#include <linux/atm.h> /* ATM stuff */
#include <linux/atmdev.h> /* ATM devices */
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 18ac80698f83..529f7e64aa2c 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -8,7 +8,6 @@
* use the default destruct function initialized by sock_init_data */
-#include <linux/config.h>
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/atmdev.h>
@@ -34,10 +33,9 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
{
struct atm_dev *dev;
- dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
- memset(dev, 0, sizeof(*dev));
dev->type = type;
dev->signal = ATM_PHY_SIG_UNKNOWN;
dev->link_rate = ATM_OC3_PCR;
@@ -114,14 +112,27 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
printk(KERN_ERR "atm_dev_register: "
"atm_proc_dev_register failed for dev %s\n",
type);
- mutex_unlock(&atm_dev_mutex);
- kfree(dev);
- return NULL;
+ goto out_fail;
+ }
+
+ if (atm_register_sysfs(dev) < 0) {
+ printk(KERN_ERR "atm_dev_register: "
+ "atm_register_sysfs failed for dev %s\n",
+ type);
+ atm_proc_dev_deregister(dev);
+ goto out_fail;
}
+
list_add_tail(&dev->dev_list, &atm_devs);
- mutex_unlock(&atm_dev_mutex);
+out:
+ mutex_unlock(&atm_dev_mutex);
return dev;
+
+out_fail:
+ kfree(dev);
+ dev = NULL;
+ goto out;
}
@@ -140,6 +151,7 @@ void atm_dev_deregister(struct atm_dev *dev)
mutex_unlock(&atm_dev_mutex);
atm_dev_release_vccs(dev);
+ atm_unregister_sysfs(dev);
atm_proc_dev_deregister(dev);
atm_dev_put(dev);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index ac7222fee7a8..1d004aaaeec1 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -6,7 +6,6 @@
#ifndef NET_ATM_RESOURCES_H
#define NET_ATM_RESOURCES_H
-#include <linux/config.h>
#include <linux/atmdev.h>
#include <linux/mutex.h>
@@ -43,4 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev)
#endif /* CONFIG_PROC_FS */
+int atm_register_sysfs(struct atm_dev *adev);
+void atm_unregister_sysfs(struct atm_dev *adev);
#endif
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a2e0dd047e9f..000695c48583 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -13,7 +13,6 @@
* Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl)
* Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/errno.h>
@@ -146,7 +145,7 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi,
ax25_cb *s;
struct hlist_node *node;
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(s, node, &ax25_list) {
if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
continue;
@@ -155,12 +154,12 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi,
/* If device is null we match any device */
if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) {
sock_hold(s->sk);
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
return s->sk;
}
}
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
return NULL;
}
@@ -175,7 +174,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
ax25_cb *s;
struct hlist_node *node;
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(s, node, &ax25_list) {
if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
!ax25cmp(&s->dest_addr, dest_addr) &&
@@ -186,7 +185,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
}
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
return sk;
}
@@ -236,7 +235,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
struct sk_buff *copy;
struct hlist_node *node;
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(s, node, &ax25_list) {
if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
s->sk->sk_type == SOCK_RAW &&
@@ -249,7 +248,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
kfree_skb(copy);
}
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
}
/*
@@ -487,10 +486,9 @@ ax25_cb *ax25_create_cb(void)
{
ax25_cb *ax25;
- if ((ax25 = kmalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL)
+ if ((ax25 = kzalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL)
return NULL;
- memset(ax25, 0x00, sizeof(*ax25));
atomic_set(&ax25->refcount, 1);
skb_queue_head_init(&ax25->write_queue);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index dab77efe34a6..b787678220ff 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -6,7 +6,6 @@
*
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -56,15 +55,13 @@ void ax25_dev_device_up(struct net_device *dev)
{
ax25_dev *ax25_dev;
- if ((ax25_dev = kmalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) {
+ if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) {
printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n");
return;
}
ax25_unregister_sysctl();
- memset(ax25_dev, 0x00, sizeof(*ax25_dev));
-
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold(dev);
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 1d4ab641f82b..4d22d4430ec8 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -80,7 +80,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25)
ax25_start_t3timer(ax25);
ax25_ds_set_timer(ax25->ax25_dev);
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(ax25o, node, &ax25_list) {
if (ax25o == ax25)
continue;
@@ -106,7 +106,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25)
if (ax25o->state != AX25_STATE_0)
ax25_start_t3timer(ax25o);
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
}
void ax25_ds_establish_data_link(ax25_cb *ax25)
@@ -162,13 +162,13 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev)
int res = 0;
struct hlist_node *node;
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(ax25, node, &ax25_list)
if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
res = 1;
break;
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
return res;
}
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5961459935eb..4f44185955c7 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -85,7 +85,7 @@ static void ax25_ds_timeout(unsigned long arg)
return;
}
- spin_lock_bh(&ax25_list_lock);
+ spin_lock(&ax25_list_lock);
ax25_for_each(ax25, node, &ax25_list) {
if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
continue;
@@ -93,7 +93,7 @@ static void ax25_ds_timeout(unsigned long arg)
ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
ax25_disconnect(ax25, ETIMEDOUT);
}
- spin_unlock_bh(&ax25_list_lock);
+ spin_unlock(&ax25_list_lock);
ax25_dev_dama_off(ax25_dev);
}
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 3bb152710b77..07ac0207eb69 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -6,7 +6,6 @@
*
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -67,10 +66,10 @@ int ax25_protocol_register(unsigned int pid,
protocol->pid = pid;
protocol->func = func;
- write_lock(&protocol_list_lock);
+ write_lock_bh(&protocol_list_lock);
protocol->next = protocol_list;
protocol_list = protocol;
- write_unlock(&protocol_list_lock);
+ write_unlock_bh(&protocol_list_lock);
return 1;
}
@@ -81,16 +80,16 @@ void ax25_protocol_release(unsigned int pid)
{
struct protocol_struct *s, *protocol;
- write_lock(&protocol_list_lock);
+ write_lock_bh(&protocol_list_lock);
protocol = protocol_list;
if (protocol == NULL) {
- write_unlock(&protocol_list_lock);
+ write_unlock_bh(&protocol_list_lock);
return;
}
if (protocol->pid == pid) {
protocol_list = protocol->next;
- write_unlock(&protocol_list_lock);
+ write_unlock_bh(&protocol_list_lock);
kfree(protocol);
return;
}
@@ -99,14 +98,14 @@ void ax25_protocol_release(unsigned int pid)
if (protocol->next->pid == pid) {
s = protocol->next;
protocol->next = protocol->next->next;
- write_unlock(&protocol_list_lock);
+ write_unlock_bh(&protocol_list_lock);
kfree(s);
return;
}
protocol = protocol->next;
}
- write_unlock(&protocol_list_lock);
+ write_unlock_bh(&protocol_list_lock);
}
EXPORT_SYMBOL(ax25_protocol_release);
@@ -267,13 +266,13 @@ int ax25_protocol_is_registered(unsigned int pid)
struct protocol_struct *protocol;
int res = 0;
- read_lock(&protocol_list_lock);
+ read_lock_bh(&protocol_list_lock);
for (protocol = protocol_list; protocol != NULL; protocol = protocol->next)
if (protocol->pid == pid) {
res = 1;
break;
}
- read_unlock(&protocol_list_lock);
+ read_unlock_bh(&protocol_list_lock);
return res;
}
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4cf87540fb3a..e9d94291581e 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -102,8 +102,8 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
{
int (*func)(struct sk_buff *, ax25_cb *);
- volatile int queued = 0;
unsigned char pid;
+ int queued = 0;
if (skb == NULL) return 0;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index a0b534f80f17..136c3aefa9de 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -6,7 +6,6 @@
*
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -104,11 +103,13 @@ int ax25_rebuild_header(struct sk_buff *skb)
{
struct sk_buff *ourskb;
unsigned char *bp = skb->data;
- struct net_device *dev;
+ ax25_route *route;
+ struct net_device *dev = NULL;
ax25_address *src, *dst;
+ ax25_digi *digipeat = NULL;
ax25_dev *ax25_dev;
- ax25_route _route, *route = &_route;
ax25_cb *ax25;
+ char ip_mode = ' ';
dst = (ax25_address *)(bp + 1);
src = (ax25_address *)(bp + 8);
@@ -116,8 +117,12 @@ int ax25_rebuild_header(struct sk_buff *skb)
if (arp_find(bp + 1, skb))
return 1;
- route = ax25_rt_find_route(route, dst, NULL);
- dev = route->dev;
+ route = ax25_get_route(dst, NULL);
+ if (route) {
+ digipeat = route->digipeat;
+ dev = route->dev;
+ ip_mode = route->ip_mode;
+ };
if (dev == NULL)
dev = skb->dev;
@@ -127,7 +132,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
}
if (bp[16] == AX25_P_IP) {
- if (route->ip_mode == 'V' || (route->ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
+ if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
/*
* We copy the buffer and release the original thereby
* keeping it straight
@@ -173,7 +178,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
ourskb,
ax25_dev->values[AX25_VALUES_PACLEN],
&src_c,
- &dst_c, route->digipeat, dev);
+ &dst_c, digipeat, dev);
if (ax25) {
ax25_cb_put(ax25);
}
@@ -191,7 +196,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
skb_pull(skb, AX25_KISS_HEADER_LEN);
- if (route->digipeat != NULL) {
+ if (digipeat != NULL) {
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
kfree_skb(skb);
goto put;
@@ -203,7 +208,8 @@ int ax25_rebuild_header(struct sk_buff *skb)
ax25_queue_xmit(skb, dev);
put:
- ax25_put_route(route);
+ if (route)
+ ax25_put_route(route);
return 1;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 5d99852b239c..d7736e585336 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -8,7 +8,6 @@
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
* Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 5ac98250797b..51b7bdaf27eb 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -41,8 +41,6 @@
static ax25_route *ax25_route_list;
static DEFINE_RWLOCK(ax25_route_lock);
-static ax25_route *ax25_get_route(ax25_address *, struct net_device *);
-
void ax25_rt_device_down(struct net_device *dev)
{
ax25_route *s, *t, *ax25_rt;
@@ -115,7 +113,7 @@ static int ax25_rt_add(struct ax25_routes_struct *route)
return -ENOMEM;
}
- atomic_set(&ax25_rt->ref, 0);
+ atomic_set(&ax25_rt->refcount, 1);
ax25_rt->callsign = route->dest_addr;
ax25_rt->dev = ax25_dev->dev;
ax25_rt->digipeat = NULL;
@@ -140,23 +138,10 @@ static int ax25_rt_add(struct ax25_routes_struct *route)
return 0;
}
-static void ax25_rt_destroy(ax25_route *ax25_rt)
+void __ax25_put_route(ax25_route *ax25_rt)
{
- if (atomic_read(&ax25_rt->ref) == 0) {
- kfree(ax25_rt->digipeat);
- kfree(ax25_rt);
- return;
- }
-
- /*
- * Uh... Route is still in use; we can't yet destroy it. Retry later.
- */
- init_timer(&ax25_rt->timer);
- ax25_rt->timer.data = (unsigned long) ax25_rt;
- ax25_rt->timer.function = (void *) ax25_rt_destroy;
- ax25_rt->timer.expires = jiffies + 5 * HZ;
-
- add_timer(&ax25_rt->timer);
+ kfree(ax25_rt->digipeat);
+ kfree(ax25_rt);
}
static int ax25_rt_del(struct ax25_routes_struct *route)
@@ -177,12 +162,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
ax25cmp(&route->dest_addr, &s->callsign) == 0) {
if (ax25_route_list == s) {
ax25_route_list = s->next;
- ax25_rt_destroy(s);
+ ax25_put_route(s);
} else {
for (t = ax25_route_list; t != NULL; t = t->next) {
if (t->next == s) {
t->next = s->next;
- ax25_rt_destroy(s);
+ ax25_put_route(s);
break;
}
}
@@ -362,7 +347,7 @@ struct file_operations ax25_route_fops = {
*
* Only routes with a reference count of zero can be destroyed.
*/
-static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
+ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
{
ax25_route *ax25_spe_rt = NULL;
ax25_route *ax25_def_rt = NULL;
@@ -392,7 +377,7 @@ static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
ax25_rt = ax25_spe_rt;
if (ax25_rt != NULL)
- atomic_inc(&ax25_rt->ref);
+ ax25_hold_route(ax25_rt);
read_unlock(&ax25_route_lock);
@@ -467,24 +452,6 @@ put:
return 0;
}
-ax25_route *ax25_rt_find_route(ax25_route * route, ax25_address *addr,
- struct net_device *dev)
-{
- ax25_route *ax25_rt;
-
- if ((ax25_rt = ax25_get_route(addr, dev)))
- return ax25_rt;
-
- route->next = NULL;
- atomic_set(&route->ref, 1);
- route->callsign = *addr;
- route->dev = dev;
- route->digipeat = NULL;
- route->ip_mode = ' ';
-
- return route;
-}
-
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
ax25_address *dest, ax25_digi *digi)
{
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index ec254057f212..72594867fab6 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -12,7 +12,6 @@
* Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
* Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index bdb64c36df12..867d42537979 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -6,7 +6,6 @@
*
* Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/spinlock.h>
@@ -204,13 +203,11 @@ void ax25_register_sysctl(void)
for (ax25_table_size = sizeof(ctl_table), ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
ax25_table_size += sizeof(ctl_table);
- if ((ax25_table = kmalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
+ if ((ax25_table = kzalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
spin_unlock_bh(&ax25_dev_lock);
return;
}
- memset(ax25_table, 0x00, ax25_table_size);
-
for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) {
ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC);
if (!child) {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 469eda0f0dfd..305a099b7477 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -24,7 +24,6 @@
/* Bluetooth address family and sockets. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -49,7 +48,7 @@
#define BT_DBG(D...)
#endif
-#define VERSION "2.8"
+#define VERSION "2.10"
/* Bluetooth sockets */
#define BT_MAX_PROTO 8
@@ -277,7 +276,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
set_current_state(TASK_INTERRUPTIBLE);
if (!timeo) {
- err = -EAGAIN;
+ err = -EINPROGRESS;
break;
}
@@ -308,13 +307,21 @@ static struct net_proto_family bt_sock_family_ops = {
static int __init bt_init(void)
{
+ int err;
+
BT_INFO("Core ver %s", VERSION);
- sock_register(&bt_sock_family_ops);
+ err = bt_sysfs_init();
+ if (err < 0)
+ return err;
- BT_INFO("HCI device and connection manager initialized");
+ err = sock_register(&bt_sock_family_ops);
+ if (err < 0) {
+ bt_sysfs_cleanup();
+ return err;
+ }
- bt_sysfs_init();
+ BT_INFO("HCI device and connection manager initialized");
hci_sock_init();
@@ -325,9 +332,9 @@ static void __exit bt_exit(void)
{
hci_sock_cleanup();
- bt_sysfs_cleanup();
-
sock_unregister(PF_BLUETOOTH);
+
+ bt_sysfs_cleanup();
}
subsys_initcall(bt_init);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index d908d49dc9f8..2312d050eeed 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -29,7 +29,6 @@
* $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -52,6 +51,7 @@
#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
#include "bnep.h"
@@ -516,6 +516,26 @@ static int bnep_session(void *arg)
return 0;
}
+static struct device *bnep_get_device(struct bnep_session *session)
+{
+ bdaddr_t *src = &bt_sk(session->sock->sk)->src;
+ bdaddr_t *dst = &bt_sk(session->sock->sk)->dst;
+ struct hci_dev *hdev;
+ struct hci_conn *conn;
+
+ hdev = hci_get_route(dst, src);
+ if (!hdev)
+ return NULL;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
+ if (!conn)
+ return NULL;
+
+ hci_dev_put(hdev);
+
+ return &conn->dev;
+}
+
int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
{
struct net_device *dev;
@@ -535,7 +555,6 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
if (!dev)
return -ENOMEM;
-
down_write(&bnep_session_sem);
ss = __bnep_get_session(dst);
@@ -552,7 +571,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
memcpy(s->eh.h_source, &dst, ETH_ALEN);
memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN);
- s->dev = dev;
+ s->dev = dev;
s->sock = sock;
s->role = req->role;
s->state = BT_CONNECTED;
@@ -569,6 +588,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
bnep_set_default_proto_filter(s);
#endif
+ SET_NETDEV_DEV(dev, bnep_get_device(s));
+
err = register_netdev(dev);
if (err) {
goto failed;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 921204f95f4a..7f7b27db6a8f 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -29,7 +29,6 @@
* $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/socket.h>
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 2bfe796cf05d..28c55835422a 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -28,7 +28,6 @@
* $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index b2e7e38531c6..be04e9fb11f6 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -20,7 +20,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -76,15 +75,13 @@
static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl)
{
- struct cmtp_application *app = kmalloc(sizeof(*app), GFP_KERNEL);
+ struct cmtp_application *app = kzalloc(sizeof(*app), GFP_KERNEL);
BT_DBG("session %p application %p appl %d", session, app, appl);
if (!app)
return NULL;
- memset(app, 0, sizeof(*app));
-
app->state = BT_OPEN;
app->appl = appl;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 901eff7ebe74..b81a01c64aea 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -20,7 +20,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -336,10 +335,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
baswap(&src, &bt_sk(sock->sk)->src);
baswap(&dst, &bt_sk(sock->sk)->dst);
- session = kmalloc(sizeof(struct cmtp_session), GFP_KERNEL);
+ session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
if (!session)
return -ENOMEM;
- memset(session, 0, sizeof(struct cmtp_session));
down_write(&cmtp_session_sem);
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 8f8fad23f78a..10ad7fd91d83 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -20,7 +20,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f812ed129e58..90e3a285a17e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -24,7 +24,6 @@
/* Bluetooth HCI connection handling. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -85,6 +84,20 @@ static void hci_acl_connect(struct hci_conn *conn)
hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp);
}
+static void hci_acl_connect_cancel(struct hci_conn *conn)
+{
+ struct hci_cp_create_conn_cancel cp;
+
+ BT_DBG("%p", conn);
+
+ if (conn->hdev->hci_ver < 2)
+ return;
+
+ bacpy(&cp.bdaddr, &conn->dst);
+ hci_send_cmd(conn->hdev, OGF_LINK_CTL,
+ OCF_CREATE_CONN_CANCEL, sizeof(cp), &cp);
+}
+
void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
{
struct hci_cp_disconnect cp;
@@ -95,7 +108,8 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
cp.handle = __cpu_to_le16(conn->handle);
cp.reason = reason;
- hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_DISCONNECT, sizeof(cp), &cp);
+ hci_send_cmd(conn->hdev, OGF_LINK_CTL,
+ OCF_DISCONNECT, sizeof(cp), &cp);
}
void hci_add_sco(struct hci_conn *conn, __u16 handle)
@@ -116,8 +130,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
static void hci_conn_timeout(unsigned long arg)
{
- struct hci_conn *conn = (void *)arg;
- struct hci_dev *hdev = conn->hdev;
+ struct hci_conn *conn = (void *) arg;
+ struct hci_dev *hdev = conn->hdev;
BT_DBG("conn %p state %d", conn, conn->state);
@@ -125,19 +139,29 @@ static void hci_conn_timeout(unsigned long arg)
return;
hci_dev_lock(hdev);
- if (conn->state == BT_CONNECTED)
+
+ switch (conn->state) {
+ case BT_CONNECT:
+ hci_acl_connect_cancel(conn);
+ break;
+ case BT_CONNECTED:
hci_acl_disconn(conn, 0x13);
- else
+ break;
+ default:
conn->state = BT_CLOSED;
+ break;
+ }
+
hci_dev_unlock(hdev);
- return;
}
-static void hci_conn_init_timer(struct hci_conn *conn)
+static void hci_conn_idle(unsigned long arg)
{
- init_timer(&conn->timer);
- conn->timer.function = hci_conn_timeout;
- conn->timer.data = (unsigned long)conn;
+ struct hci_conn *conn = (void *) arg;
+
+ BT_DBG("conn %p mode %d", conn, conn->mode);
+
+ hci_conn_enter_sniff_mode(conn);
}
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
@@ -146,17 +170,27 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
BT_DBG("%s dst %s", hdev->name, batostr(dst));
- if (!(conn = kmalloc(sizeof(struct hci_conn), GFP_ATOMIC)))
+ conn = kzalloc(sizeof(struct hci_conn), GFP_ATOMIC);
+ if (!conn)
return NULL;
- memset(conn, 0, sizeof(struct hci_conn));
bacpy(&conn->dst, dst);
- conn->type = type;
conn->hdev = hdev;
+ conn->type = type;
+ conn->mode = HCI_CM_ACTIVE;
conn->state = BT_OPEN;
+ conn->power_save = 1;
+
skb_queue_head_init(&conn->data_q);
- hci_conn_init_timer(conn);
+
+ init_timer(&conn->disc_timer);
+ conn->disc_timer.function = hci_conn_timeout;
+ conn->disc_timer.data = (unsigned long) conn;
+
+ init_timer(&conn->idle_timer);
+ conn->idle_timer.function = hci_conn_idle;
+ conn->idle_timer.data = (unsigned long) conn;
atomic_set(&conn->refcnt, 0);
@@ -168,6 +202,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
+ hci_conn_add_sysfs(conn);
+
tasklet_enable(&hdev->tx_task);
return conn;
@@ -179,7 +215,9 @@ int hci_conn_del(struct hci_conn *conn)
BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle);
- hci_conn_del_timer(conn);
+ del_timer(&conn->idle_timer);
+
+ del_timer(&conn->disc_timer);
if (conn->type == SCO_LINK) {
struct hci_conn *acl = conn->link;
@@ -198,6 +236,8 @@ int hci_conn_del(struct hci_conn *conn)
tasklet_disable(&hdev->tx_task);
+ hci_conn_del_sysfs(conn);
+
hci_conn_hash_del(hdev, conn);
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
@@ -208,7 +248,9 @@ int hci_conn_del(struct hci_conn *conn)
hci_dev_put(hdev);
- kfree(conn);
+ /* will free via device release */
+ put_device(&conn->dev);
+
return 0;
}
@@ -365,6 +407,70 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role)
}
EXPORT_SYMBOL(hci_conn_switch_role);
+/* Enter active mode */
+void hci_conn_enter_active_mode(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("conn %p mode %d", conn, conn->mode);
+
+ if (test_bit(HCI_RAW, &hdev->flags))
+ return;
+
+ if (conn->mode != HCI_CM_SNIFF || !conn->power_save)
+ goto timer;
+
+ if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+ struct hci_cp_exit_sniff_mode cp;
+ cp.handle = __cpu_to_le16(conn->handle);
+ hci_send_cmd(hdev, OGF_LINK_POLICY,
+ OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
+ }
+
+timer:
+ if (hdev->idle_timeout > 0)
+ mod_timer(&conn->idle_timer,
+ jiffies + msecs_to_jiffies(hdev->idle_timeout));
+}
+
+/* Enter sniff mode */
+void hci_conn_enter_sniff_mode(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("conn %p mode %d", conn, conn->mode);
+
+ if (test_bit(HCI_RAW, &hdev->flags))
+ return;
+
+ if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn))
+ return;
+
+ if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF))
+ return;
+
+ if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
+ struct hci_cp_sniff_subrate cp;
+ cp.handle = __cpu_to_le16(conn->handle);
+ cp.max_latency = __constant_cpu_to_le16(0);
+ cp.min_remote_timeout = __constant_cpu_to_le16(0);
+ cp.min_local_timeout = __constant_cpu_to_le16(0);
+ hci_send_cmd(hdev, OGF_LINK_POLICY,
+ OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
+ }
+
+ if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+ struct hci_cp_sniff_mode cp;
+ cp.handle = __cpu_to_le16(conn->handle);
+ cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
+ cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
+ cp.attempt = __constant_cpu_to_le16(4);
+ cp.timeout = __constant_cpu_to_le16(1);
+ hci_send_cmd(hdev, OGF_LINK_POLICY,
+ OCF_SNIFF_MODE, sizeof(cp), &cp);
+ }
+}
+
/* Drop all connection on the device */
void hci_conn_hash_flush(struct hci_dev *hdev)
{
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a49a6975092d..338ae977a31b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -24,7 +24,6 @@
/* Bluetooth HCI core. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kmod.h>
@@ -207,6 +206,9 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
/* Read Local Supported Features */
hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL);
+ /* Read Local Version */
+ hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION, 0, NULL);
+
/* Read Buffer Size (ACL mtu, max pkt, etc.) */
hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL);
@@ -337,9 +339,8 @@ void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data)
if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) {
/* Entry not in the cache. Add new one. */
- if (!(e = kmalloc(sizeof(struct inquiry_entry), GFP_ATOMIC)))
+ if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC)))
return;
- memset(e, 0, sizeof(struct inquiry_entry));
e->next = cache->list;
cache->list = e;
}
@@ -412,7 +413,7 @@ int hci_inquiry(void __user *arg)
}
hci_dev_unlock_bh(hdev);
- timeo = ir.length * 2 * HZ;
+ timeo = ir.length * msecs_to_jiffies(2000);
if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0)
goto done;
@@ -480,7 +481,8 @@ int hci_dev_open(__u16 dev)
set_bit(HCI_INIT, &hdev->flags);
//__hci_request(hdev, hci_reset_req, 0, HZ);
- ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_request(hdev, hci_init_req, 0,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -547,7 +549,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
atomic_set(&hdev->cmd_cnt, 1);
if (!test_bit(HCI_RAW, &hdev->flags)) {
set_bit(HCI_INIT, &hdev->flags);
- __hci_request(hdev, hci_reset_req, 0, HZ/4);
+ __hci_request(hdev, hci_reset_req, 0,
+ msecs_to_jiffies(250));
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -620,7 +623,8 @@ int hci_dev_reset(__u16 dev)
hdev->acl_cnt = 0; hdev->sco_cnt = 0;
if (!test_bit(HCI_RAW, &hdev->flags))
- ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_request(hdev, hci_reset_req, 0,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
done:
tasklet_enable(&hdev->tx_task);
@@ -658,7 +662,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_request(hdev, hci_auth_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+ err = hci_request(hdev, hci_auth_req, dr.dev_opt,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
break;
case HCISETENCRYPT:
@@ -669,18 +674,19 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_request(hdev, hci_auth_req,
- dr.dev_opt, HCI_INIT_TIMEOUT);
+ err = hci_request(hdev, hci_auth_req, dr.dev_opt,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
if (err)
break;
}
- err = hci_request(hdev, hci_encrypt_req,
- dr.dev_opt, HCI_INIT_TIMEOUT);
+ err = hci_request(hdev, hci_encrypt_req, dr.dev_opt,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
break;
case HCISETSCAN:
- err = hci_request(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+ err = hci_request(hdev, hci_scan_req, dr.dev_opt,
+ msecs_to_jiffies(HCI_INIT_TIMEOUT));
break;
case HCISETPTYPE:
@@ -796,12 +802,10 @@ struct hci_dev *hci_alloc_dev(void)
{
struct hci_dev *hdev;
- hdev = kmalloc(sizeof(struct hci_dev), GFP_KERNEL);
+ hdev = kzalloc(sizeof(struct hci_dev), GFP_KERNEL);
if (!hdev)
return NULL;
- memset(hdev, 0, sizeof(struct hci_dev));
-
skb_queue_head_init(&hdev->driver_init);
return hdev;
@@ -813,8 +817,8 @@ void hci_free_dev(struct hci_dev *hdev)
{
skb_queue_purge(&hdev->driver_init);
- /* will free via class release */
- class_device_put(&hdev->class_dev);
+ /* will free via device release */
+ put_device(&hdev->dev);
}
EXPORT_SYMBOL(hci_free_dev);
@@ -849,6 +853,10 @@ int hci_register_dev(struct hci_dev *hdev)
hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
hdev->link_mode = (HCI_LM_ACCEPT);
+ hdev->idle_timeout = 0;
+ hdev->sniff_max_interval = 800;
+ hdev->sniff_min_interval = 80;
+
tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev);
tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev);
tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev);
@@ -1221,6 +1229,9 @@ static inline void hci_sched_acl(struct hci_dev *hdev)
while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
BT_DBG("skb %p len %d", skb, skb->len);
+
+ hci_conn_enter_active_mode(conn);
+
hci_send_frame(skb);
hdev->acl_last_tx = jiffies;
@@ -1299,6 +1310,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
if (conn) {
register struct hci_proto *hp;
+ hci_conn_enter_active_mode(conn);
+
/* Send to upper protocol */
if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) {
hp->recv_acldata(conn, skb, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index eb64555d1fb3..d43d0c890975 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -24,7 +24,6 @@
/* Bluetooth HCI event handling. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -63,6 +62,7 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb
switch (ocf) {
case OCF_INQUIRY_CANCEL:
+ case OCF_EXIT_PERIODIC_INQ:
status = *((__u8 *) skb->data);
if (status) {
@@ -84,6 +84,8 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *
{
struct hci_conn *conn;
struct hci_rp_role_discovery *rd;
+ struct hci_rp_write_link_policy *lp;
+ void *sent;
BT_DBG("%s ocf 0x%x", hdev->name, ocf);
@@ -107,6 +109,27 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *
hci_dev_unlock(hdev);
break;
+ case OCF_WRITE_LINK_POLICY:
+ sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY);
+ if (!sent)
+ break;
+
+ lp = (struct hci_rp_write_link_policy *) skb->data;
+
+ if (lp->status)
+ break;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle));
+ if (conn) {
+ __le16 policy = get_unaligned((__le16 *) (sent + 2));
+ conn->link_policy = __le16_to_cpu(policy);
+ }
+
+ hci_dev_unlock(hdev);
+ break;
+
default:
BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x",
hdev->name, ocf);
@@ -275,15 +298,33 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb
/* Command Complete OGF INFO_PARAM */
static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
{
- struct hci_rp_read_loc_features *lf;
+ struct hci_rp_read_loc_version *lv;
+ struct hci_rp_read_local_features *lf;
struct hci_rp_read_buffer_size *bs;
struct hci_rp_read_bd_addr *ba;
BT_DBG("%s ocf 0x%x", hdev->name, ocf);
switch (ocf) {
+ case OCF_READ_LOCAL_VERSION:
+ lv = (struct hci_rp_read_loc_version *) skb->data;
+
+ if (lv->status) {
+ BT_DBG("%s READ_LOCAL_VERSION failed %d", hdev->name, lf->status);
+ break;
+ }
+
+ hdev->hci_ver = lv->hci_ver;
+ hdev->hci_rev = btohs(lv->hci_rev);
+ hdev->manufacturer = btohs(lv->manufacturer);
+
+ BT_DBG("%s: manufacturer %d hci_ver %d hci_rev %d", hdev->name,
+ hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
+
+ break;
+
case OCF_READ_LOCAL_FEATURES:
- lf = (struct hci_rp_read_loc_features *) skb->data;
+ lf = (struct hci_rp_read_local_features *) skb->data;
if (lf->status) {
BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status);
@@ -306,7 +347,8 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s
if (hdev->features[1] & LMP_HV3)
hdev->pkt_type |= (HCI_HV3);
- BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]);
+ BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name,
+ lf->features[0], lf->features[1], lf->features[2]);
break;
@@ -320,9 +362,17 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s
}
hdev->acl_mtu = __le16_to_cpu(bs->acl_mtu);
- hdev->sco_mtu = bs->sco_mtu ? bs->sco_mtu : 64;
- hdev->acl_pkts = hdev->acl_cnt = __le16_to_cpu(bs->acl_max_pkt);
- hdev->sco_pkts = hdev->sco_cnt = __le16_to_cpu(bs->sco_max_pkt);
+ hdev->sco_mtu = bs->sco_mtu;
+ hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt);
+ hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt);
+
+ if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) {
+ hdev->sco_mtu = 64;
+ hdev->sco_pkts = 8;
+ }
+
+ hdev->acl_cnt = hdev->acl_pkts;
+ hdev->sco_cnt = hdev->sco_pkts;
BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name,
hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts);
@@ -440,8 +490,46 @@ static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status)
BT_DBG("%s ocf 0x%x", hdev->name, ocf);
switch (ocf) {
+ case OCF_SNIFF_MODE:
+ if (status) {
+ struct hci_conn *conn;
+ struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE);
+
+ if (!cp)
+ break;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+ if (conn) {
+ clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
+ }
+
+ hci_dev_unlock(hdev);
+ }
+ break;
+
+ case OCF_EXIT_SNIFF_MODE:
+ if (status) {
+ struct hci_conn *conn;
+ struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE);
+
+ if (!cp)
+ break;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+ if (conn) {
+ clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
+ }
+
+ hci_dev_unlock(hdev);
+ }
+ break;
+
default:
- BT_DBG("%s Command status: ogf HOST_POLICY ocf %x", hdev->name, ocf);
+ BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf);
break;
}
}
@@ -623,14 +711,16 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
else
cp.role = 0x01; /* Remain slave */
- hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+ hci_send_cmd(hdev, OGF_LINK_CTL,
+ OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp);
} else {
/* Connection rejected */
struct hci_cp_reject_conn_req cp;
bacpy(&cp.bdaddr, &ev->bdaddr);
cp.reason = 0x0f;
- hci_send_cmd(hdev, OGF_LINK_CTL, OCF_REJECT_CONN_REQ, sizeof(cp), &cp);
+ hci_send_cmd(hdev, OGF_LINK_CTL,
+ OCF_REJECT_CONN_REQ, sizeof(cp), &cp);
}
}
@@ -638,7 +728,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data;
- struct hci_conn *conn = NULL;
+ struct hci_conn *conn;
BT_DBG("%s", hdev->name);
@@ -660,12 +750,21 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (test_bit(HCI_ENCRYPT, &hdev->flags))
conn->link_mode |= HCI_LM_ENCRYPT;
+ /* Get remote features */
+ if (conn->type == ACL_LINK) {
+ struct hci_cp_read_remote_features cp;
+ cp.handle = ev->handle;
+ hci_send_cmd(hdev, OGF_LINK_CTL,
+ OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp);
+ }
+
/* Set link policy */
if (conn->type == ACL_LINK && hdev->link_policy) {
struct hci_cp_write_link_policy cp;
cp.handle = ev->handle;
cp.policy = __cpu_to_le16(hdev->link_policy);
- hci_send_cmd(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
+ hci_send_cmd(hdev, OGF_LINK_POLICY,
+ OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
}
/* Set packet type for incoming connection */
@@ -676,7 +775,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
__cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
__cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
- hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
+ hci_send_cmd(hdev, OGF_LINK_CTL,
+ OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
+ } else {
+ /* Update disconnect timer */
+ hci_conn_hold(conn);
+ hci_conn_put(conn);
}
} else
conn->state = BT_CLOSED;
@@ -704,8 +808,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data;
- struct hci_conn *conn = NULL;
- __u16 handle = __le16_to_cpu(ev->handle);
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
@@ -714,7 +817,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_handle(hdev, handle);
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn) {
conn->state = BT_CLOSED;
hci_proto_disconn_ind(conn, ev->reason);
@@ -771,7 +874,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data;
- struct hci_conn *conn = NULL;
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
@@ -794,18 +897,43 @@ static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb
hci_dev_unlock(hdev);
}
+/* Mode Change */
+static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status %d", hdev->name, ev->status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (conn) {
+ conn->mode = ev->mode;
+ conn->interval = __le16_to_cpu(ev->interval);
+
+ if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+ if (conn->mode == HCI_CM_ACTIVE)
+ conn->power_save = 1;
+ else
+ conn->power_save = 0;
+ }
+ }
+
+ hci_dev_unlock(hdev);
+}
+
/* Authentication Complete */
static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data;
- struct hci_conn *conn = NULL;
- __u16 handle = __le16_to_cpu(ev->handle);
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_handle(hdev, handle);
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn) {
if (!ev->status)
conn->link_mode |= HCI_LM_AUTH;
@@ -820,8 +948,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
cp.handle = __cpu_to_le16(conn->handle);
cp.encrypt = 1;
hci_send_cmd(conn->hdev, OGF_LINK_CTL,
- OCF_SET_CONN_ENCRYPT,
- sizeof(cp), &cp);
+ OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
} else {
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
hci_encrypt_cfm(conn, ev->status, 0x00);
@@ -836,14 +963,13 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data;
- struct hci_conn *conn = NULL;
- __u16 handle = __le16_to_cpu(ev->handle);
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_handle(hdev, handle);
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn) {
if (!ev->status) {
if (ev->encrypt)
@@ -864,14 +990,13 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *
static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data;
- struct hci_conn *conn = NULL;
- __u16 handle = __le16_to_cpu(ev->handle);
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_handle(hdev, handle);
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn) {
if (!ev->status)
conn->link_mode |= HCI_LM_SECURE;
@@ -899,18 +1024,35 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff
{
}
+/* Remote Features */
+static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status %d", hdev->name, ev->status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (conn && !ev->status) {
+ memcpy(conn->features, ev->features, sizeof(conn->features));
+ }
+
+ hci_dev_unlock(hdev);
+}
+
/* Clock Offset */
static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data;
- struct hci_conn *conn = NULL;
- __u16 handle = __le16_to_cpu(ev->handle);
+ struct hci_conn *conn;
BT_DBG("%s status %d", hdev->name, ev->status);
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_handle(hdev, handle);
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn && !ev->status) {
struct inquiry_entry *ie;
@@ -941,6 +1083,23 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *
hci_dev_unlock(hdev);
}
+/* Sniff Subrate */
+static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status %d", hdev->name, ev->status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (conn) {
+ }
+
+ hci_dev_unlock(hdev);
+}
+
void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data;
@@ -989,6 +1148,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_role_change_evt(hdev, skb);
break;
+ case HCI_EV_MODE_CHANGE:
+ hci_mode_change_evt(hdev, skb);
+ break;
+
case HCI_EV_AUTH_COMPLETE:
hci_auth_complete_evt(hdev, skb);
break;
@@ -1013,6 +1176,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_link_key_notify_evt(hdev, skb);
break;
+ case HCI_EV_REMOTE_FEATURES:
+ hci_remote_features_evt(hdev, skb);
+ break;
+
case HCI_EV_CLOCK_OFFSET:
hci_clock_offset_evt(hdev, skb);
break;
@@ -1021,6 +1188,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_pscan_rep_mode_evt(hdev, skb);
break;
+ case HCI_EV_SNIFF_SUBRATE:
+ hci_sniff_subrate_evt(hdev, skb);
+ break;
+
case HCI_EV_CMD_STATUS:
cs = (struct hci_ev_cmd_status *) skb->data;
skb_pull(skb, sizeof(cs));
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 97bdec73d17e..1a35d343e08a 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -24,7 +24,6 @@
/* Bluetooth HCI sockets. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 0ed38740388c..989b22d9042e 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,9 +1,10 @@
/* Bluetooth HCI driver model support. */
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/platform_device.h>
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -12,35 +13,63 @@
#define BT_DBG(D...)
#endif
-static ssize_t show_name(struct class_device *cdev, char *buf)
+static inline char *typetostr(int type)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
- return sprintf(buf, "%s\n", hdev->name);
+ switch (type) {
+ case HCI_VIRTUAL:
+ return "VIRTUAL";
+ case HCI_USB:
+ return "USB";
+ case HCI_PCCARD:
+ return "PCCARD";
+ case HCI_UART:
+ return "UART";
+ case HCI_RS232:
+ return "RS232";
+ case HCI_PCI:
+ return "PCI";
+ case HCI_SDIO:
+ return "SDIO";
+ default:
+ return "UNKNOWN";
+ }
}
-static ssize_t show_type(struct class_device *cdev, char *buf)
+static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
- return sprintf(buf, "%d\n", hdev->type);
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%s\n", typetostr(hdev->type));
}
-static ssize_t show_address(struct class_device *cdev, char *buf)
+static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
+ struct hci_dev *hdev = dev_get_drvdata(dev);
bdaddr_t bdaddr;
baswap(&bdaddr, &hdev->bdaddr);
return sprintf(buf, "%s\n", batostr(&bdaddr));
}
-static ssize_t show_flags(struct class_device *cdev, char *buf)
+static ssize_t show_manufacturer(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->manufacturer);
+}
+
+static ssize_t show_hci_version(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->hci_ver);
+}
+
+static ssize_t show_hci_revision(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
- return sprintf(buf, "0x%lx\n", hdev->flags);
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->hci_rev);
}
-static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
+static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
+ struct hci_dev *hdev = dev_get_drvdata(dev);
struct inquiry_cache *cache = &hdev->inq_cache;
struct inquiry_entry *e;
int n = 0;
@@ -62,94 +91,268 @@ static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
return n;
}
-static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
-static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
+static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->idle_timeout);
+}
-static struct class_device_attribute *bt_attrs[] = {
- &class_device_attr_name,
- &class_device_attr_type,
- &class_device_attr_address,
- &class_device_attr_flags,
- &class_device_attr_inquiry_cache,
- NULL
-};
+static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ char *ptr;
+ __u32 val;
+
+ val = simple_strtoul(buf, &ptr, 10);
+ if (ptr == buf)
+ return -EINVAL;
+
+ if (val != 0 && (val < 500 || val > 3600000))
+ return -EINVAL;
+
+ hdev->idle_timeout = val;
+
+ return count;
+}
-#ifdef CONFIG_HOTPLUG
-static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
- int n, i = 0;
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->sniff_max_interval);
+}
- envp[i++] = buf;
- n = snprintf(buf, size, "INTERFACE=%s", hdev->name) + 1;
- buf += n;
- size -= n;
+static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ char *ptr;
+ __u16 val;
- if ((size <= 0) || (i >= num_envp))
- return -ENOMEM;
+ val = simple_strtoul(buf, &ptr, 10);
+ if (ptr == buf)
+ return -EINVAL;
- envp[i] = NULL;
- return 0;
+ if (val < 0x0002 || val > 0xFFFE || val % 2)
+ return -EINVAL;
+
+ if (val < hdev->sniff_min_interval)
+ return -EINVAL;
+
+ hdev->sniff_max_interval = val;
+
+ return count;
+}
+
+static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", hdev->sniff_min_interval);
}
-#endif
-static void bt_release(struct class_device *cdev)
+static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
- struct hci_dev *hdev = class_get_devdata(cdev);
+ struct hci_dev *hdev = dev_get_drvdata(dev);
+ char *ptr;
+ __u16 val;
+
+ val = simple_strtoul(buf, &ptr, 10);
+ if (ptr == buf)
+ return -EINVAL;
- kfree(hdev);
+ if (val < 0x0002 || val > 0xFFFE || val % 2)
+ return -EINVAL;
+
+ if (val > hdev->sniff_max_interval)
+ return -EINVAL;
+
+ hdev->sniff_min_interval = val;
+
+ return count;
}
-struct class bt_class = {
- .name = "bluetooth",
- .release = bt_release,
-#ifdef CONFIG_HOTPLUG
- .uevent = bt_uevent,
-#endif
+static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL);
+static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL);
+static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL);
+static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
+
+static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR,
+ show_idle_timeout, store_idle_timeout);
+static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR,
+ show_sniff_max_interval, store_sniff_max_interval);
+static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
+ show_sniff_min_interval, store_sniff_min_interval);
+
+static struct device_attribute *bt_attrs[] = {
+ &dev_attr_type,
+ &dev_attr_address,
+ &dev_attr_manufacturer,
+ &dev_attr_hci_version,
+ &dev_attr_hci_revision,
+ &dev_attr_inquiry_cache,
+ &dev_attr_idle_timeout,
+ &dev_attr_sniff_max_interval,
+ &dev_attr_sniff_min_interval,
+ NULL
};
+static ssize_t show_conn_type(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_conn *conn = dev_get_drvdata(dev);
+ return sprintf(buf, "%s\n", conn->type == ACL_LINK ? "ACL" : "SCO");
+}
+
+static ssize_t show_conn_address(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hci_conn *conn = dev_get_drvdata(dev);
+ bdaddr_t bdaddr;
+ baswap(&bdaddr, &conn->dst);
+ return sprintf(buf, "%s\n", batostr(&bdaddr));
+}
+
+#define CONN_ATTR(_name,_mode,_show,_store) \
+struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL);
+static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL);
+
+static struct device_attribute *conn_attrs[] = {
+ &conn_attr_type,
+ &conn_attr_address,
+ NULL
+};
+
+struct class *bt_class = NULL;
EXPORT_SYMBOL_GPL(bt_class);
+static struct bus_type bt_bus = {
+ .name = "bluetooth",
+};
+
+static struct platform_device *bt_platform;
+
+static void bt_release(struct device *dev)
+{
+ void *data = dev_get_drvdata(dev);
+ kfree(data);
+}
+
+static void add_conn(void *data)
+{
+ struct hci_conn *conn = data;
+ int i;
+
+ device_register(&conn->dev);
+
+ for (i = 0; conn_attrs[i]; i++)
+ device_create_file(&conn->dev, conn_attrs[i]);
+}
+
+void hci_conn_add_sysfs(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+ bdaddr_t *ba = &conn->dst;
+
+ BT_DBG("conn %p", conn);
+
+ conn->dev.parent = &hdev->dev;
+ conn->dev.release = bt_release;
+
+ snprintf(conn->dev.bus_id, BUS_ID_SIZE,
+ "%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X",
+ conn->type == ACL_LINK ? "acl" : "sco",
+ ba->b[5], ba->b[4], ba->b[3],
+ ba->b[2], ba->b[1], ba->b[0]);
+
+ dev_set_drvdata(&conn->dev, conn);
+
+ INIT_WORK(&conn->work, add_conn, (void *) conn);
+
+ schedule_work(&conn->work);
+}
+
+static void del_conn(void *data)
+{
+ struct hci_conn *conn = data;
+ device_del(&conn->dev);
+}
+
+void hci_conn_del_sysfs(struct hci_conn *conn)
+{
+ BT_DBG("conn %p", conn);
+
+ INIT_WORK(&conn->work, del_conn, (void *) conn);
+
+ schedule_work(&conn->work);
+}
+
int hci_register_sysfs(struct hci_dev *hdev)
{
- struct class_device *cdev = &hdev->class_dev;
+ struct device *dev = &hdev->dev;
unsigned int i;
int err;
BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
- cdev->class = &bt_class;
- class_set_devdata(cdev, hdev);
+ dev->class = bt_class;
- strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE);
- err = class_device_register(cdev);
+ if (hdev->parent)
+ dev->parent = hdev->parent;
+ else
+ dev->parent = &bt_platform->dev;
+
+ strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE);
+
+ dev->release = bt_release;
+
+ dev_set_drvdata(dev, hdev);
+
+ err = device_register(dev);
if (err < 0)
return err;
for (i = 0; bt_attrs[i]; i++)
- class_device_create_file(cdev, bt_attrs[i]);
+ device_create_file(dev, bt_attrs[i]);
return 0;
}
void hci_unregister_sysfs(struct hci_dev *hdev)
{
- struct class_device * cdev = &hdev->class_dev;
-
BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
- class_device_del(cdev);
+ device_del(&hdev->dev);
}
int __init bt_sysfs_init(void)
{
- return class_register(&bt_class);
+ int err;
+
+ bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0);
+ if (IS_ERR(bt_platform))
+ return PTR_ERR(bt_platform);
+
+ err = bus_register(&bt_bus);
+ if (err < 0) {
+ platform_device_unregister(bt_platform);
+ return err;
+ }
+
+ bt_class = class_create(THIS_MODULE, "bluetooth");
+ if (IS_ERR(bt_class)) {
+ bus_unregister(&bt_bus);
+ platform_device_unregister(bt_platform);
+ return PTR_ERR(bt_class);
+ }
+
+ return 0;
}
-void __exit bt_sysfs_cleanup(void)
+void bt_sysfs_cleanup(void)
{
- class_unregister(&bt_class);
+ class_destroy(bt_class);
+
+ bus_unregister(&bt_bus);
+
+ platform_device_unregister(bt_platform);
}
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index edfea772fb67..c6abf2a5a932 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,7 +1,6 @@
config BT_HIDP
tristate "HIDP protocol support"
- depends on BT && BT_L2CAP && (BROKEN || !S390)
- select INPUT
+ depends on BT && BT_L2CAP && INPUT
help
HIDP (Human Interface Device Protocol) is a transport layer
for HID reports. HIDP is required for the Bluetooth Human
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index cdb9cfafd960..03b5dadb4951 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -20,7 +20,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -41,6 +40,7 @@
#include <linux/input.h>
#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
#include "hidp.h"
@@ -529,6 +529,26 @@ static int hidp_session(void *arg)
return 0;
}
+static struct device *hidp_get_device(struct hidp_session *session)
+{
+ bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src;
+ bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst;
+ struct hci_dev *hdev;
+ struct hci_conn *conn;
+
+ hdev = hci_get_route(dst, src);
+ if (!hdev)
+ return NULL;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
+ if (!conn)
+ return NULL;
+
+ hci_dev_put(hdev);
+
+ return &conn->dev;
+}
+
static inline void hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req)
{
struct input_dev *input = session->input;
@@ -567,6 +587,8 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co
input->relbit[0] |= BIT(REL_WHEEL);
}
+ input->cdev.dev = hidp_get_device(session);
+
input->event = hidp_input_event;
input_register_device(input);
@@ -583,10 +605,9 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst))
return -ENOTUNIQ;
- session = kmalloc(sizeof(struct hidp_session), GFP_KERNEL);
+ session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL);
if (!session)
return -ENOMEM;
- memset(session, 0, sizeof(struct hidp_session));
session->input = input_allocate_device();
if (!session->input) {
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index b8f67761b886..099646e4e2ef 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -20,7 +20,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6b4a8085357..d56f60b392ac 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -24,7 +24,6 @@
/* Bluetooth L2CAP core and sockets. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -64,11 +63,6 @@ static struct bt_sock_list l2cap_sk_list = {
.lock = RW_LOCK_UNLOCKED
};
-static int l2cap_conn_del(struct hci_conn *conn, int err);
-
-static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent);
-static void l2cap_chan_del(struct sock *sk, int err);
-
static void __l2cap_sock_close(struct sock *sk, int reason);
static void l2cap_sock_close(struct sock *sk);
static void l2cap_sock_kill(struct sock *sk);
@@ -110,24 +104,177 @@ static void l2cap_sock_init_timer(struct sock *sk)
sk->sk_timer.data = (unsigned long)sk;
}
+/* ---- L2CAP channels ---- */
+static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
+{
+ struct sock *s;
+ for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+ if (l2cap_pi(s)->dcid == cid)
+ break;
+ }
+ return s;
+}
+
+static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+ struct sock *s;
+ for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+ if (l2cap_pi(s)->scid == cid)
+ break;
+ }
+ return s;
+}
+
+/* Find channel with given SCID.
+ * Returns locked socket */
+static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+ struct sock *s;
+ read_lock(&l->lock);
+ s = __l2cap_get_chan_by_scid(l, cid);
+ if (s) bh_lock_sock(s);
+ read_unlock(&l->lock);
+ return s;
+}
+
+static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+ struct sock *s;
+ for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+ if (l2cap_pi(s)->ident == ident)
+ break;
+ }
+ return s;
+}
+
+static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+ struct sock *s;
+ read_lock(&l->lock);
+ s = __l2cap_get_chan_by_ident(l, ident);
+ if (s) bh_lock_sock(s);
+ read_unlock(&l->lock);
+ return s;
+}
+
+static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
+{
+ u16 cid = 0x0040;
+
+ for (; cid < 0xffff; cid++) {
+ if(!__l2cap_get_chan_by_scid(l, cid))
+ return cid;
+ }
+
+ return 0;
+}
+
+static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
+{
+ sock_hold(sk);
+
+ if (l->head)
+ l2cap_pi(l->head)->prev_c = sk;
+
+ l2cap_pi(sk)->next_c = l->head;
+ l2cap_pi(sk)->prev_c = NULL;
+ l->head = sk;
+}
+
+static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
+{
+ struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
+
+ write_lock_bh(&l->lock);
+ if (sk == l->head)
+ l->head = next;
+
+ if (next)
+ l2cap_pi(next)->prev_c = prev;
+ if (prev)
+ l2cap_pi(prev)->next_c = next;
+ write_unlock_bh(&l->lock);
+
+ __sock_put(sk);
+}
+
+static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
+{
+ struct l2cap_chan_list *l = &conn->chan_list;
+
+ BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
+
+ l2cap_pi(sk)->conn = conn;
+
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ /* Alloc CID for connection-oriented socket */
+ l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
+ } else if (sk->sk_type == SOCK_DGRAM) {
+ /* Connectionless socket */
+ l2cap_pi(sk)->scid = 0x0002;
+ l2cap_pi(sk)->dcid = 0x0002;
+ l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+ } else {
+ /* Raw socket can send/recv signalling messages only */
+ l2cap_pi(sk)->scid = 0x0001;
+ l2cap_pi(sk)->dcid = 0x0001;
+ l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+ }
+
+ __l2cap_chan_link(l, sk);
+
+ if (parent)
+ bt_accept_enqueue(parent, sk);
+}
+
+/* Delete channel.
+ * Must be called on the locked socket. */
+static void l2cap_chan_del(struct sock *sk, int err)
+{
+ struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+ struct sock *parent = bt_sk(sk)->parent;
+
+ l2cap_sock_clear_timer(sk);
+
+ BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+ if (conn) {
+ /* Unlink from channel list */
+ l2cap_chan_unlink(&conn->chan_list, sk);
+ l2cap_pi(sk)->conn = NULL;
+ hci_conn_put(conn->hcon);
+ }
+
+ sk->sk_state = BT_CLOSED;
+ sock_set_flag(sk, SOCK_ZAPPED);
+
+ if (err)
+ sk->sk_err = err;
+
+ if (parent) {
+ bt_accept_unlink(sk);
+ parent->sk_data_ready(parent, 0);
+ } else
+ sk->sk_state_change(sk);
+}
+
/* ---- L2CAP connections ---- */
static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
{
- struct l2cap_conn *conn;
-
- if ((conn = hcon->l2cap_data))
- return conn;
+ struct l2cap_conn *conn = hcon->l2cap_data;
- if (status)
+ if (conn || status)
return conn;
- if (!(conn = kmalloc(sizeof(struct l2cap_conn), GFP_ATOMIC)))
+ conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC);
+ if (!conn)
return NULL;
- memset(conn, 0, sizeof(struct l2cap_conn));
hcon->l2cap_data = conn;
conn->hcon = hcon;
+ BT_DBG("hcon %p conn %p", hcon, conn);
+
conn->mtu = hcon->hdev->acl_mtu;
conn->src = &hcon->hdev->bdaddr;
conn->dst = &hcon->dst;
@@ -135,17 +282,16 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
spin_lock_init(&conn->lock);
rwlock_init(&conn->chan_list.lock);
- BT_DBG("hcon %p conn %p", hcon, conn);
return conn;
}
-static int l2cap_conn_del(struct hci_conn *hcon, int err)
+static void l2cap_conn_del(struct hci_conn *hcon, int err)
{
- struct l2cap_conn *conn;
+ struct l2cap_conn *conn = hcon->l2cap_data;
struct sock *sk;
- if (!(conn = hcon->l2cap_data))
- return 0;
+ if (!conn)
+ return;
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
@@ -162,15 +308,14 @@ static int l2cap_conn_del(struct hci_conn *hcon, int err)
hcon->l2cap_data = NULL;
kfree(conn);
- return 0;
}
static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
{
struct l2cap_chan_list *l = &conn->chan_list;
- write_lock(&l->lock);
+ write_lock_bh(&l->lock);
__l2cap_chan_add(conn, sk, parent);
- write_unlock(&l->lock);
+ write_unlock_bh(&l->lock);
}
static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
@@ -183,14 +328,14 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
* 200 - 254 are used by utilities like l2ping, etc.
*/
- spin_lock(&conn->lock);
+ spin_lock_bh(&conn->lock);
if (++conn->tx_ident > 128)
conn->tx_ident = 1;
id = conn->tx_ident;
- spin_unlock(&conn->lock);
+ spin_unlock_bh(&conn->lock);
return id;
}
@@ -926,160 +1071,6 @@ static int l2cap_sock_release(struct socket *sock)
return err;
}
-/* ---- L2CAP channels ---- */
-static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
-{
- struct sock *s;
- for (s = l->head; s; s = l2cap_pi(s)->next_c) {
- if (l2cap_pi(s)->dcid == cid)
- break;
- }
- return s;
-}
-
-static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
-{
- struct sock *s;
- for (s = l->head; s; s = l2cap_pi(s)->next_c) {
- if (l2cap_pi(s)->scid == cid)
- break;
- }
- return s;
-}
-
-/* Find channel with given SCID.
- * Returns locked socket */
-static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
-{
- struct sock *s;
- read_lock(&l->lock);
- s = __l2cap_get_chan_by_scid(l, cid);
- if (s) bh_lock_sock(s);
- read_unlock(&l->lock);
- return s;
-}
-
-static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
-{
- struct sock *s;
- for (s = l->head; s; s = l2cap_pi(s)->next_c) {
- if (l2cap_pi(s)->ident == ident)
- break;
- }
- return s;
-}
-
-static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
-{
- struct sock *s;
- read_lock(&l->lock);
- s = __l2cap_get_chan_by_ident(l, ident);
- if (s) bh_lock_sock(s);
- read_unlock(&l->lock);
- return s;
-}
-
-static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
-{
- u16 cid = 0x0040;
-
- for (; cid < 0xffff; cid++) {
- if(!__l2cap_get_chan_by_scid(l, cid))
- return cid;
- }
-
- return 0;
-}
-
-static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
-{
- sock_hold(sk);
-
- if (l->head)
- l2cap_pi(l->head)->prev_c = sk;
-
- l2cap_pi(sk)->next_c = l->head;
- l2cap_pi(sk)->prev_c = NULL;
- l->head = sk;
-}
-
-static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
-{
- struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
-
- write_lock(&l->lock);
- if (sk == l->head)
- l->head = next;
-
- if (next)
- l2cap_pi(next)->prev_c = prev;
- if (prev)
- l2cap_pi(prev)->next_c = next;
- write_unlock(&l->lock);
-
- __sock_put(sk);
-}
-
-static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
-{
- struct l2cap_chan_list *l = &conn->chan_list;
-
- BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
-
- l2cap_pi(sk)->conn = conn;
-
- if (sk->sk_type == SOCK_SEQPACKET) {
- /* Alloc CID for connection-oriented socket */
- l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
- } else if (sk->sk_type == SOCK_DGRAM) {
- /* Connectionless socket */
- l2cap_pi(sk)->scid = 0x0002;
- l2cap_pi(sk)->dcid = 0x0002;
- l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
- } else {
- /* Raw socket can send/recv signalling messages only */
- l2cap_pi(sk)->scid = 0x0001;
- l2cap_pi(sk)->dcid = 0x0001;
- l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
- }
-
- __l2cap_chan_link(l, sk);
-
- if (parent)
- bt_accept_enqueue(parent, sk);
-}
-
-/* Delete channel.
- * Must be called on the locked socket. */
-static void l2cap_chan_del(struct sock *sk, int err)
-{
- struct l2cap_conn *conn = l2cap_pi(sk)->conn;
- struct sock *parent = bt_sk(sk)->parent;
-
- l2cap_sock_clear_timer(sk);
-
- BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
-
- if (conn) {
- /* Unlink from channel list */
- l2cap_chan_unlink(&conn->chan_list, sk);
- l2cap_pi(sk)->conn = NULL;
- hci_conn_put(conn->hcon);
- }
-
- sk->sk_state = BT_CLOSED;
- sock_set_flag(sk, SOCK_ZAPPED);
-
- if (err)
- sk->sk_err = err;
-
- if (parent) {
- bt_accept_unlink(sk);
- parent->sk_data_ready(parent, 0);
- } else
- sk->sk_state_change(sk);
-}
-
static void l2cap_conn_ready(struct l2cap_conn *conn)
{
struct l2cap_chan_list *l = &conn->chan_list;
@@ -1425,11 +1416,11 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
if (!sk)
goto response;
- write_lock(&list->lock);
+ write_lock_bh(&list->lock);
/* Check if we already have channel with that dcid */
if (__l2cap_get_chan_by_dcid(list, scid)) {
- write_unlock(&list->lock);
+ write_unlock_bh(&list->lock);
sock_set_flag(sk, SOCK_ZAPPED);
l2cap_sock_kill(sk);
goto response;
@@ -1467,7 +1458,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
result = status = 0;
done:
- write_unlock(&list->lock);
+ write_unlock_bh(&list->lock);
response:
bh_unlock_sock(parent);
@@ -1835,7 +1826,9 @@ drop:
kfree_skb(skb);
done:
- if (sk) bh_unlock_sock(sk);
+ if (sk)
+ bh_unlock_sock(sk);
+
return 0;
}
@@ -1926,18 +1919,18 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
+ struct l2cap_conn *conn;
+
BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
if (hcon->type != ACL_LINK)
return 0;
if (!status) {
- struct l2cap_conn *conn;
-
conn = l2cap_conn_add(hcon, status);
if (conn)
l2cap_conn_ready(conn);
- } else
+ } else
l2cap_conn_del(hcon, bt_err(status));
return 0;
@@ -1951,19 +1944,21 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason)
return 0;
l2cap_conn_del(hcon, bt_err(reason));
+
return 0;
}
static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
{
struct l2cap_chan_list *l;
- struct l2cap_conn *conn;
+ struct l2cap_conn *conn = conn = hcon->l2cap_data;
struct l2cap_conn_rsp rsp;
struct sock *sk;
int result;
- if (!(conn = hcon->l2cap_data))
+ if (!conn)
return 0;
+
l = &conn->chan_list;
BT_DBG("conn %p", conn);
@@ -2006,13 +2001,14 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
{
struct l2cap_chan_list *l;
- struct l2cap_conn *conn;
+ struct l2cap_conn *conn = hcon->l2cap_data;
struct l2cap_conn_rsp rsp;
struct sock *sk;
int result;
- if (!(conn = hcon->l2cap_data))
+ if (!conn)
return 0;
+
l = &conn->chan_list;
BT_DBG("conn %p", conn);
@@ -2220,7 +2216,7 @@ static int __init l2cap_init(void)
goto error;
}
- class_create_file(&bt_class, &class_attr_l2cap);
+ class_create_file(bt_class, &class_attr_l2cap);
BT_INFO("L2CAP ver %s", VERSION);
BT_INFO("L2CAP socket layer initialized");
@@ -2234,7 +2230,7 @@ error:
static void __exit l2cap_exit(void)
{
- class_remove_file(&bt_class, &class_attr_l2cap);
+ class_remove_file(bt_class, &class_attr_l2cap);
if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index ee6a66979913..e5fd0cb70ae9 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -24,7 +24,6 @@
/* Bluetooth kernel library. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e99010ce8bb2..468df3b953f6 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -27,7 +27,6 @@
* $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -53,8 +52,10 @@
#define BT_DBG(D...)
#endif
-#define VERSION "1.7"
+#define VERSION "1.8"
+static int disable_cfc = 0;
+static int channel_mtu = -1;
static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU;
static struct task_struct *rfcomm_thread;
@@ -273,10 +274,10 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
{
- struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio);
+ struct rfcomm_dlc *d = kzalloc(sizeof(*d), prio);
+
if (!d)
return NULL;
- memset(d, 0, sizeof(*d));
init_timer(&d->timer);
d->timer.function = rfcomm_dlc_timeout;
@@ -289,6 +290,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
rfcomm_dlc_clear_state(d);
BT_DBG("%p", d);
+
return d;
}
@@ -522,10 +524,10 @@ int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig)
/* ---- RFCOMM sessions ---- */
static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
{
- struct rfcomm_session *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct rfcomm_session *s = kzalloc(sizeof(*s), GFP_KERNEL);
+
if (!s)
return NULL;
- memset(s, 0, sizeof(*s));
BT_DBG("session %p sock %p", s, sock);
@@ -534,7 +536,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
s->sock = sock;
s->mtu = RFCOMM_DEFAULT_MTU;
- s->cfc = RFCOMM_CFC_UNKNOWN;
+ s->cfc = disable_cfc ? RFCOMM_CFC_DISABLED : RFCOMM_CFC_UNKNOWN;
/* Do not increment module usage count for listening sessions.
* Otherwise we won't be able to unload the module. */
@@ -642,7 +644,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
addr.l2_family = AF_BLUETOOTH;
addr.l2_psm = htobs(RFCOMM_PSM);
*err = sock->ops->connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
- if (*err == 0 || *err == -EAGAIN)
+ if (*err == 0 || *err == -EINPROGRESS)
return s;
rfcomm_session_del(s);
@@ -811,7 +813,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d
pn->credits = 0;
}
- pn->mtu = htobs(d->mtu);
+ if (cr && channel_mtu >= 0)
+ pn->mtu = htobs(channel_mtu);
+ else
+ pn->mtu = htobs(d->mtu);
*ptr = __fcs(buf); ptr++;
@@ -1150,6 +1155,8 @@ static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d)
static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
{
+ struct sock *sk = d->session->sock->sk;
+
BT_DBG("dlc %p", d);
rfcomm_send_ua(d->session, d->dlci);
@@ -1159,6 +1166,9 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
d->state_change(d, 0);
rfcomm_dlc_unlock(d);
+ if (d->link_mode & RFCOMM_LM_MASTER)
+ hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00);
+
rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
}
@@ -1223,17 +1233,24 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
BT_DBG("dlc %p state %ld dlci %d mtu %d fc 0x%x credits %d",
d, d->state, d->dlci, pn->mtu, pn->flow_ctrl, pn->credits);
- if (pn->flow_ctrl == 0xf0 || pn->flow_ctrl == 0xe0) {
- d->cfc = s->cfc = RFCOMM_CFC_ENABLED;
+ if ((pn->flow_ctrl == 0xf0 && s->cfc != RFCOMM_CFC_DISABLED) ||
+ pn->flow_ctrl == 0xe0) {
+ d->cfc = RFCOMM_CFC_ENABLED;
d->tx_credits = pn->credits;
} else {
- d->cfc = s->cfc = RFCOMM_CFC_DISABLED;
+ d->cfc = RFCOMM_CFC_DISABLED;
set_bit(RFCOMM_TX_THROTTLED, &d->flags);
}
+ if (s->cfc == RFCOMM_CFC_UNKNOWN)
+ s->cfc = d->cfc;
+
d->priority = pn->priority;
- d->mtu = s->mtu = btohs(pn->mtu);
+ d->mtu = btohs(pn->mtu);
+
+ if (cr && d->mtu > s->mtu)
+ d->mtu = s->mtu;
return 0;
}
@@ -1760,6 +1777,11 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
s = rfcomm_session_add(nsock, BT_OPEN);
if (s) {
rfcomm_session_hold(s);
+
+ /* We should adjust MTU on incoming sessions.
+ * L2CAP MTU minus UIH header and FCS. */
+ s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5;
+
rfcomm_schedule(RFCOMM_SCHED_RX);
} else
sock_release(nsock);
@@ -2036,7 +2058,7 @@ static int __init rfcomm_init(void)
kernel_thread(rfcomm_run, NULL, CLONE_KERNEL);
- class_create_file(&bt_class, &class_attr_rfcomm_dlc);
+ class_create_file(bt_class, &class_attr_rfcomm_dlc);
rfcomm_init_sockets();
@@ -2051,7 +2073,7 @@ static int __init rfcomm_init(void)
static void __exit rfcomm_exit(void)
{
- class_remove_file(&bt_class, &class_attr_rfcomm_dlc);
+ class_remove_file(bt_class, &class_attr_rfcomm_dlc);
hci_unregister_cb(&rfcomm_cb);
@@ -2074,6 +2096,12 @@ static void __exit rfcomm_exit(void)
module_init(rfcomm_init);
module_exit(rfcomm_exit);
+module_param(disable_cfc, bool, 0644);
+MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control");
+
+module_param(channel_mtu, int, 0644);
+MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel");
+
module_param(l2cap_mtu, uint, 0644);
MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection");
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 757d2dd3b02f..220fee04e7f2 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -27,7 +27,6 @@
* $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -945,7 +944,7 @@ int __init rfcomm_init_sockets(void)
if (err < 0)
goto error;
- class_create_file(&bt_class, &class_attr_rfcomm);
+ class_create_file(bt_class, &class_attr_rfcomm);
BT_INFO("RFCOMM socket layer initialized");
@@ -959,7 +958,7 @@ error:
void __exit rfcomm_cleanup_sockets(void)
{
- class_remove_file(&bt_class, &class_attr_rfcomm);
+ class_remove_file(bt_class, &class_attr_rfcomm);
if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
BT_ERR("RFCOMM socket layer unregistration failed");
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 74368f79ee5d..26f322737db0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -27,7 +27,6 @@
* $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/tty.h>
@@ -39,6 +38,7 @@
#include <linux/skbuff.h>
#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/rfcomm.h>
#ifndef CONFIG_BT_RFCOMM_DEBUG
@@ -162,6 +162,24 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id)
return dev;
}
+static struct device *rfcomm_get_device(struct rfcomm_dev *dev)
+{
+ struct hci_dev *hdev;
+ struct hci_conn *conn;
+
+ hdev = hci_get_route(&dev->dst, &dev->src);
+ if (!hdev)
+ return NULL;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst);
+ if (!conn)
+ return NULL;
+
+ hci_dev_put(hdev);
+
+ return &conn->dev;
+}
+
static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
{
struct rfcomm_dev *dev;
@@ -170,10 +188,9 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
BT_DBG("id %d channel %d", req->dev_id, req->channel);
- dev = kmalloc(sizeof(struct rfcomm_dev), GFP_KERNEL);
+ dev = kzalloc(sizeof(struct rfcomm_dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
- memset(dev, 0, sizeof(struct rfcomm_dev));
write_lock_bh(&rfcomm_dev_lock);
@@ -246,7 +263,7 @@ out:
return err;
}
- tty_register_device(rfcomm_tty_driver, dev->id, NULL);
+ tty_register_device(rfcomm_tty_driver, dev->id, rfcomm_get_device(dev));
return dev->id;
}
@@ -480,12 +497,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
- if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
- tty_buffer_request_room(tty, skb->len);
- tty_insert_flip_string(tty, skb->data, skb->len);
- tty_flip_buffer_push(tty);
- } else
- tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len);
+ tty_insert_flip_string(tty, skb->data, skb->len);
+ tty_flip_buffer_push(tty);
kfree_skb(skb);
}
@@ -1025,13 +1038,12 @@ int rfcomm_init_ttys(void)
rfcomm_tty_driver->owner = THIS_MODULE;
rfcomm_tty_driver->driver_name = "rfcomm";
- rfcomm_tty_driver->devfs_name = "bluetooth/rfcomm/";
rfcomm_tty_driver->name = "rfcomm";
rfcomm_tty_driver->major = RFCOMM_TTY_MAJOR;
rfcomm_tty_driver->minor_start = RFCOMM_TTY_MINOR;
rfcomm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
- rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS;
+ rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
rfcomm_tty_driver->init_termios = tty_std_termios;
rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 0c2d13ad69bb..7714a2ec3854 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -24,7 +24,6 @@
/* Bluetooth SCO sockets. */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -109,17 +108,14 @@ static void sco_sock_init_timer(struct sock *sk)
static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
{
struct hci_dev *hdev = hcon->hdev;
- struct sco_conn *conn;
-
- if ((conn = hcon->sco_data))
- return conn;
+ struct sco_conn *conn = hcon->sco_data;
- if (status)
+ if (conn || status)
return conn;
- if (!(conn = kmalloc(sizeof(struct sco_conn), GFP_ATOMIC)))
+ conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC);
+ if (!conn)
return NULL;
- memset(conn, 0, sizeof(struct sco_conn));
spin_lock_init(&conn->lock);
@@ -135,6 +131,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
conn->mtu = 60;
BT_DBG("hcon %p conn %p", hcon, conn);
+
return conn;
}
@@ -970,7 +967,7 @@ static int __init sco_init(void)
goto error;
}
- class_create_file(&bt_class, &class_attr_sco);
+ class_create_file(bt_class, &class_attr_sco);
BT_INFO("SCO (Voice Link) ver %s", VERSION);
BT_INFO("SCO socket layer initialized");
@@ -984,7 +981,7 @@ error:
static void __exit sco_exit(void)
{
- class_remove_file(&bt_class, &class_attr_sco);
+ class_remove_file(bt_class, &class_attr_sco);
if (bt_sock_unregister(BTPROTO_SCO) < 0)
BT_ERR("SCO socket unregistration failed");
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 654401ceb2db..2994387999a8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -13,7 +13,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2afdc7c0736c..f8dbcee80eba 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -184,6 +184,6 @@ void br_dev_setup(struct net_device *dev)
dev->set_mac_address = br_set_mac_address;
dev->priv_flags = IFF_EBRIDGE;
- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_NO_CSUM;
+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8be9f2123e54..191b861e5e53 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,16 +35,17 @@ static inline unsigned packet_length(const struct sk_buff *skb)
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
/* drop mtu oversized packets except gso */
- if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
+ if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
kfree_skb(skb);
else {
-#ifdef CONFIG_BRIDGE_NETFILTER
/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
- nf_bridge_maybe_copy_header(skb);
-#endif
- skb_push(skb, ETH_HLEN);
+ if (nf_bridge_maybe_copy_header(skb))
+ kfree_skb(skb);
+ else {
+ skb_push(skb, ETH_HLEN);
- dev_queue_xmit(skb);
+ dev_queue_xmit(skb);
+ }
}
return 0;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 07956ecf545e..b1211d5342f6 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -386,13 +386,19 @@ void br_features_recompute(struct net_bridge *br)
checksum = 0;
if (feature & NETIF_F_GSO)
- feature |= NETIF_F_TSO;
+ feature |= NETIF_F_GSO_SOFTWARE;
feature |= NETIF_F_GSO;
features &= feature;
}
- br->dev->features = features | checksum | NETIF_F_LLTX;
+ if (!(checksum & NETIF_F_ALL_CSUM))
+ features &= ~NETIF_F_SG;
+ if (!(features & NETIF_F_SG))
+ features &= ~NETIF_F_GSO_MASK;
+
+ br->dev->features = features | checksum | NETIF_F_LLTX |
+ NETIF_F_GSO_ROBUST;
}
/* called with RTNL */
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 159fb8409824..4e4119a12139 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -162,12 +162,10 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (num > BR_MAX_PORTS)
num = BR_MAX_PORTS;
- indices = kmalloc(num*sizeof(int), GFP_KERNEL);
+ indices = kcalloc(num, sizeof(int), GFP_KERNEL);
if (indices == NULL)
return -ENOMEM;
- memset(indices, 0, num*sizeof(int));
-
get_port_ifindices(br, indices, num);
if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
num = -EFAULT;
@@ -327,11 +325,10 @@ static int old_deviceless(void __user *uarg)
if (args[2] >= 2048)
return -ENOMEM;
- indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL);
+ indices = kcalloc(args[2], sizeof(int), GFP_KERNEL);
if (indices == NULL)
return -ENOMEM;
- memset(indices, 0, args[2]*sizeof(int));
args[2] = get_bridge_ifindices(indices, args[2]);
ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8298a5179aef..ac181be13d83 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -53,14 +53,17 @@
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
-static int brnf_call_iptables = 1;
-static int brnf_call_ip6tables = 1;
-static int brnf_call_arptables = 1;
-static int brnf_filter_vlan_tagged = 1;
+static int brnf_call_iptables __read_mostly = 1;
+static int brnf_call_ip6tables __read_mostly = 1;
+static int brnf_call_arptables __read_mostly = 1;
+static int brnf_filter_vlan_tagged __read_mostly = 1;
#else
#define brnf_filter_vlan_tagged 1
#endif
+int brnf_deferred_hooks;
+EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
+
static __be16 inline vlan_proto(const struct sk_buff *skb)
{
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -124,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
static inline void nf_bridge_save_header(struct sk_buff *skb)
{
- int header_size = 16;
+ int header_size = ETH_HLEN;
if (skb->protocol == htons(ETH_P_8021Q))
- header_size = 18;
+ header_size += VLAN_HLEN;
memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
}
+/*
+ * When forwarding bridge frames, we save a copy of the original
+ * header before processing.
+ */
+int nf_bridge_copy_header(struct sk_buff *skb)
+{
+ int err;
+ int header_size = ETH_HLEN;
+
+ if (skb->protocol == htons(ETH_P_8021Q))
+ header_size += VLAN_HLEN;
+
+ err = skb_cow(skb, header_size);
+ if (err)
+ return err;
+
+ memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
+
+ if (skb->protocol == htons(ETH_P_8021Q))
+ __skb_push(skb, VLAN_HLEN);
+ return 0;
+}
+
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
@@ -692,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
else
pf = PF_INET6;
-#ifdef CONFIG_NETFILTER_DEBUG
- /* Sometimes we get packets with NULL ->dst here (for example,
- * running a dhcp client daemon triggers this). This should now
- * be fixed, but let's keep the check around. */
- if (skb->dst == NULL) {
- printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
- return NF_ACCEPT;
- }
-#endif
-
nf_bridge = skb->nf_bridge;
nf_bridge->physoutdev = skb->dev;
realindev = nf_bridge->physindev;
@@ -761,7 +777,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP) &&
skb->len > skb->dev->mtu &&
- !skb_shinfo(skb)->gso_size)
+ !skb_is_gso(skb))
return ip_fragment(skb, br_dev_queue_push_xmit);
else
return br_dev_queue_push_xmit(skb);
@@ -783,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
* keep the check just to be sure... */
if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
- "bad mac.raw pointer.");
+ "bad mac.raw pointer.\n");
goto print_error;
}
#endif
@@ -801,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
#ifdef CONFIG_NETFILTER_DEBUG
if (skb->dst == NULL) {
- printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
+ printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
goto print_error;
}
#endif
@@ -838,6 +854,7 @@ print_error:
}
printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
skb->data);
+ dump_stack();
return NF_ACCEPT;
#endif
}
@@ -890,6 +907,8 @@ static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
return NF_ACCEPT;
else if (ip->version == 6 && !brnf_call_ip6tables)
return NF_ACCEPT;
+ else if (!brnf_deferred_hooks)
+ return NF_ACCEPT;
#endif
if (hook == NF_IP_POST_ROUTING)
return NF_ACCEPT;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 881d7d1a732a..8f661195d09d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/rtnetlink.h>
+#include <net/netlink.h>
#include "br_private.h"
/*
@@ -76,26 +77,24 @@ rtattr_failure:
void br_ifinfo_notify(int event, struct net_bridge_port *port)
{
struct sk_buff *skb;
- int err = -ENOMEM;
+ int payload = sizeof(struct ifinfomsg) + 128;
+ int err = -ENOBUFS;
pr_debug("bridge notify event=%d\n", event);
- skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128),
- GFP_ATOMIC);
- if (!skb)
- goto err_out;
-
- err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0);
- if (err)
- goto err_kfree;
-
- NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
- return;
-
-err_kfree:
- kfree_skb(skb);
-err_out:
- netlink_set_err(rtnl, 0, RTNLGRP_LINK, err);
+ skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_LINK, err);
}
/*
@@ -117,12 +116,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (idx < s_idx)
- continue;
+ goto cont;
err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
if (err <= 0)
break;
+cont:
++idx;
}
read_unlock(&dev_base_lock);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index a7ba0cce0b46..068d8afbf0a7 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -121,7 +121,7 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
buf[1] = 0;
buf[2] = 0;
buf[3] = BPDU_TYPE_TCN;
- br_send_bpdu(p, buf, 7);
+ br_send_bpdu(p, buf, 4);
}
/*
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index d19fc4b328dc..0aa7b9910a86 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -20,7 +20,7 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
const void *data, unsigned int datalen)
{
struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data;
- u32 _sip, *siptr, _dip, *diptr;
+ __be32 _sip, *siptr, _dip, *diptr;
struct arphdr _ah, *ap;
unsigned char _sha[ETH_ALEN], *shp;
struct sk_buff *skb = *pskb;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ee5a51761260..9f950db3b76f 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -29,7 +29,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
@@ -75,6 +74,9 @@ static void ulog_send(unsigned int nlgroup)
if (timer_pending(&ub->timer))
del_timer(&ub->timer);
+ if (!ub->skb)
+ return;
+
/* last nlmsg needs NLMSG_DONE */
if (ub->qlen > 1)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3a13ed643459..3df55b2bd91d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -24,6 +24,7 @@
#include <linux/vmalloc.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <asm/uaccess.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
@@ -31,36 +32,9 @@
/* needed for logical [in,out]-dev filtering */
#include "../br_private.h"
-/* list_named_find */
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/mutex.h>
-
-#if 0
-/* use this for remote debugging
- * Copyright (C) 1998 by Ori Pomerantz
- * Print the string to the appropriate tty, the one
- * the current task uses
- */
-static void print_string(char *str)
-{
- struct tty_struct *my_tty;
-
- /* The tty for the current task */
- my_tty = current->signal->tty;
- if (my_tty != NULL) {
- my_tty->driver->write(my_tty, 0, str, strlen(str));
- my_tty->driver->write(my_tty, 0, "\015\012", 2);
- }
-}
-
-#define BUGPRINT(args) print_string(args);
-#else
#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
"report to author: "format, ## args)
/* #define BUGPRINT(format, args...) */
-#endif
#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
": out of memory: "format, ## args)
/* #define MEMPRINT(format, args...) */
@@ -299,18 +273,22 @@ static inline void *
find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
struct mutex *mutex)
{
- void *ret;
+ struct {
+ struct list_head list;
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ } *e;
*error = mutex_lock_interruptible(mutex);
if (*error != 0)
return NULL;
- ret = list_named_find(head, name);
- if (!ret) {
- *error = -ENOENT;
- mutex_unlock(mutex);
+ list_for_each_entry(e, head, list) {
+ if (strcmp(e->name, name) == 0)
+ return e;
}
- return ret;
+ *error = -ENOENT;
+ mutex_unlock(mutex);
+ return NULL;
}
#ifndef CONFIG_KMOD
@@ -1064,15 +1042,19 @@ free_newinfo:
int ebt_register_target(struct ebt_target *target)
{
+ struct ebt_target *t;
int ret;
ret = mutex_lock_interruptible(&ebt_mutex);
if (ret != 0)
return ret;
- if (!list_named_insert(&ebt_targets, target)) {
- mutex_unlock(&ebt_mutex);
- return -EEXIST;
+ list_for_each_entry(t, &ebt_targets, list) {
+ if (strcmp(t->name, target->name) == 0) {
+ mutex_unlock(&ebt_mutex);
+ return -EEXIST;
+ }
}
+ list_add(&target->list, &ebt_targets);
mutex_unlock(&ebt_mutex);
return 0;
@@ -1081,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target)
void ebt_unregister_target(struct ebt_target *target)
{
mutex_lock(&ebt_mutex);
- LIST_DELETE(&ebt_targets, target);
+ list_del(&target->list);
mutex_unlock(&ebt_mutex);
}
int ebt_register_match(struct ebt_match *match)
{
+ struct ebt_match *m;
int ret;
ret = mutex_lock_interruptible(&ebt_mutex);
if (ret != 0)
return ret;
- if (!list_named_insert(&ebt_matches, match)) {
- mutex_unlock(&ebt_mutex);
- return -EEXIST;
+ list_for_each_entry(m, &ebt_matches, list) {
+ if (strcmp(m->name, match->name) == 0) {
+ mutex_unlock(&ebt_mutex);
+ return -EEXIST;
+ }
}
+ list_add(&match->list, &ebt_matches);
mutex_unlock(&ebt_mutex);
return 0;
@@ -1104,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match)
void ebt_unregister_match(struct ebt_match *match)
{
mutex_lock(&ebt_mutex);
- LIST_DELETE(&ebt_matches, match);
+ list_del(&match->list);
mutex_unlock(&ebt_mutex);
}
int ebt_register_watcher(struct ebt_watcher *watcher)
{
+ struct ebt_watcher *w;
int ret;
ret = mutex_lock_interruptible(&ebt_mutex);
if (ret != 0)
return ret;
- if (!list_named_insert(&ebt_watchers, watcher)) {
- mutex_unlock(&ebt_mutex);
- return -EEXIST;
+ list_for_each_entry(w, &ebt_watchers, list) {
+ if (strcmp(w->name, watcher->name) == 0) {
+ mutex_unlock(&ebt_mutex);
+ return -EEXIST;
+ }
}
+ list_add(&watcher->list, &ebt_watchers);
mutex_unlock(&ebt_mutex);
return 0;
@@ -1127,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher)
void ebt_unregister_watcher(struct ebt_watcher *watcher)
{
mutex_lock(&ebt_mutex);
- LIST_DELETE(&ebt_watchers, watcher);
+ list_del(&watcher->list);
mutex_unlock(&ebt_mutex);
}
int ebt_register_table(struct ebt_table *table)
{
struct ebt_table_info *newinfo;
+ struct ebt_table *t;
int ret, i, countersize;
if (!table || !table->table ||!table->table->entries ||
@@ -1179,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table)
if (ret != 0)
goto free_chainstack;
- if (list_named_find(&ebt_tables, table->name)) {
- ret = -EEXIST;
- BUGPRINT("Table name already exists\n");
- goto free_unlock;
+ list_for_each_entry(t, &ebt_tables, list) {
+ if (strcmp(t->name, table->name) == 0) {
+ ret = -EEXIST;
+ BUGPRINT("Table name already exists\n");
+ goto free_unlock;
+ }
}
/* Hold a reference count if the chains aren't empty */
@@ -1190,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table)
ret = -ENOENT;
goto free_unlock;
}
- list_prepend(&ebt_tables, table);
+ list_add(&table->list, &ebt_tables);
mutex_unlock(&ebt_mutex);
return 0;
free_unlock:
@@ -1216,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table)
return;
}
mutex_lock(&ebt_mutex);
- LIST_DELETE(&ebt_tables, table);
+ list_del(&table->list);
mutex_unlock(&ebt_mutex);
vfree(table->private->entries);
if (table->private->chainstack) {
@@ -1486,7 +1479,7 @@ static int __init ebtables_init(void)
int ret;
mutex_lock(&ebt_mutex);
- list_named_insert(&ebt_targets, &ebt_standard_target);
+ list_add(&ebt_standard_target.list, &ebt_targets);
mutex_unlock(&ebt_mutex);
if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
return ret;
diff --git a/net/core/Makefile b/net/core/Makefile
index e9bd2467d5a9..119568077dab 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_mcast.o dst.o \
+obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
@@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index aecddcc30401..f558c61aecc7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb)
sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_HW))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
goto fault;
if ((unsigned short)csum_fold(csum))
goto csum_error;
- if (unlikely(skb->ip_summed == CHECKSUM_HW))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
iov->iov_len -= chunk;
iov->iov_base += chunk;
diff --git a/net/core/dev.c b/net/core/dev.c
index ea2469398bd5..4d891beab138 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -76,7 +76,6 @@
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -117,6 +116,7 @@
#include <linux/audit.h>
#include <linux/dmaengine.h>
#include <linux/err.h>
+#include <linux/ctype.h>
/*
* The list of packet types we will receive (as opposed to discard)
@@ -230,7 +230,7 @@ extern void netdev_unregister_sysfs(struct net_device *);
* For efficiency
*/
-int netdev_nit;
+static int netdev_nit;
/*
* Add a protocol ID to the list. Now that the input handler is
@@ -633,14 +633,24 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
* @name: name string
*
* Network device names need to be valid file names to
- * to allow sysfs to work
+ * to allow sysfs to work. We also disallow any kind of
+ * whitespace.
*/
int dev_valid_name(const char *name)
{
- return !(*name == '\0'
- || !strcmp(name, ".")
- || !strcmp(name, "..")
- || strchr(name, '/'));
+ if (*name == '\0')
+ return 0;
+ if (strlen(name) >= IFNAMSIZ)
+ return 0;
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ return 0;
+
+ while (*name) {
+ if (*name == '/' || isspace(*name))
+ return 0;
+ name++;
+ }
+ return 1;
}
/**
@@ -1158,14 +1168,17 @@ EXPORT_SYMBOL(netif_device_attach);
* Invalidate hardware checksum when packet is to be mangled, and
* complete checksum manually on outgoing path.
*/
-int skb_checksum_help(struct sk_buff *skb, int inward)
+int skb_checksum_help(struct sk_buff *skb)
{
unsigned int csum;
int ret = 0, offset = skb->h.raw - skb->data;
- if (inward) {
- skb->ip_summed = CHECKSUM_NONE;
- goto out;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ goto out_set_summed;
+
+ if (unlikely(skb_shinfo(skb)->gso_size)) {
+ /* Let GSO fix up the checksum. */
+ goto out_set_summed;
}
if (skb_cloned(skb)) {
@@ -1182,6 +1195,8 @@ int skb_checksum_help(struct sk_buff *skb, int inward)
BUG_ON(skb->csum + 2 > offset);
*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+
+out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
return ret;
@@ -1190,32 +1205,50 @@ out:
/**
* skb_gso_segment - Perform segmentation on skb.
* @skb: buffer to segment
- * @sg: whether scatter-gather is supported on the target.
+ * @features: features for the output path (see dev->features)
*
* This function segments the given skb and returns a list of segments.
+ *
+ * It may return NULL if the skb requires no segmentation. This is
+ * only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
int type = skb->protocol;
+ int err;
BUG_ON(skb_shinfo(skb)->frag_list);
- BUG_ON(skb->ip_summed != CHECKSUM_HW);
skb->mac.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->data;
__skb_pull(skb, skb->mac_len);
+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (skb_header_cloned(skb) &&
+ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ return ERR_PTR(err);
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
- segs = ptype->gso_segment(skb, sg);
+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ err = ptype->gso_send_check(skb);
+ segs = ERR_PTR(err);
+ if (err || skb_gso_ok(skb, features))
+ break;
+ __skb_push(skb, skb->data - skb->nh.raw);
+ }
+ segs = ptype->gso_segment(skb, features);
break;
}
}
rcu_read_unlock();
+ __skb_push(skb, skb->data - skb->mac.raw);
+
return segs;
}
@@ -1234,7 +1267,6 @@ void netdev_rx_csum_fault(struct net_device *dev)
EXPORT_SYMBOL(netdev_rx_csum_fault);
#endif
-#ifdef CONFIG_HIGHMEM
/* Actually, we should eliminate this check as soon as we know, that:
* 1. IOMMU is present and allows to map all the memory.
* 2. No high memory really exists on this machine.
@@ -1242,6 +1274,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
+#ifdef CONFIG_HIGHMEM
int i;
if (dev->features & NETIF_F_HIGHDMA)
@@ -1251,11 +1284,9 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (PageHighMem(skb_shinfo(skb)->frags[i].page))
return 1;
+#endif
return 0;
}
-#else
-#define illegal_highdma(dev, skb) (0)
-#endif
struct dev_gso_cb {
void (*destructor)(struct sk_buff *skb);
@@ -1291,9 +1322,15 @@ static int dev_gso_segment(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct sk_buff *segs;
+ int features = dev->features & ~(illegal_highdma(dev, skb) ?
+ NETIF_F_SG : 0);
+
+ segs = skb_gso_segment(skb, features);
+
+ /* Verifying header integrity only. */
+ if (!segs)
+ return 0;
- segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
- !illegal_highdma(dev, skb));
if (unlikely(IS_ERR(segs)))
return PTR_ERR(segs);
@@ -1310,13 +1347,17 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (netdev_nit)
dev_queue_xmit_nit(skb, dev);
- if (!netif_needs_gso(dev, skb))
- return dev->hard_start_xmit(skb, dev);
+ if (netif_needs_gso(dev, skb)) {
+ if (unlikely(dev_gso_segment(skb)))
+ goto out_kfree_skb;
+ if (skb->next)
+ goto gso;
+ }
- if (unlikely(dev_gso_segment(skb)))
- goto out_kfree_skb;
+ return dev->hard_start_xmit(skb, dev);
}
+gso:
do {
struct sk_buff *nskb = skb->next;
int rc;
@@ -1325,9 +1366,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
nskb->next = NULL;
rc = dev->hard_start_xmit(nskb, dev);
if (unlikely(rc)) {
+ nskb->next = skb->next;
skb->next = nskb;
return rc;
}
+ if (unlikely(netif_queue_stopped(dev) && skb->next))
+ return NETDEV_TX_BUSY;
} while (skb->next);
skb->destructor = DEV_GSO_CB(skb)->destructor;
@@ -1402,11 +1446,11 @@ int dev_queue_xmit(struct sk_buff *skb)
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
- if (skb->ip_summed == CHECKSUM_HW &&
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
(!(dev->features & NETIF_F_GEN_CSUM) &&
(!(dev->features & NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP))))
- if (skb_checksum_help(skb, 0))
+ if (skb_checksum_help(skb))
goto out_kfree_skb;
gso:
@@ -1436,14 +1480,16 @@ gso:
if (q->enqueue) {
/* Grab device queue */
spin_lock(&dev->queue_lock);
+ q = dev->qdisc;
+ if (q->enqueue) {
+ rc = q->enqueue(skb, q);
+ qdisc_run(dev);
+ spin_unlock(&dev->queue_lock);
- rc = q->enqueue(skb, q);
-
- qdisc_run(dev);
-
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
+ }
spin_unlock(&dev->queue_lock);
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
- goto out;
}
/* The device has no queue. Common case for software devices:
@@ -1586,26 +1632,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
struct net_device *dev = skb->dev;
if (dev->master) {
- /*
- * On bonding slaves other than the currently active
- * slave, suppress duplicates except for 802.3ad
- * ETH_P_SLOW and alb non-mcast/bcast.
- */
- if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
- if (dev->master->priv_flags & IFF_MASTER_ALB) {
- if (skb->pkt_type != PACKET_BROADCAST &&
- skb->pkt_type != PACKET_MULTICAST)
- goto keep;
- }
-
- if (dev->master->priv_flags & IFF_MASTER_8023AD &&
- skb->protocol == __constant_htons(ETH_P_SLOW))
- goto keep;
-
+ if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
-keep:
skb->dev = dev->master;
}
@@ -1712,7 +1742,7 @@ static int ing_filter(struct sk_buff *skb)
if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
- printk("Redir loop detected Dropping packet (%s->%s)\n",
+ printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}
@@ -2907,7 +2937,7 @@ int register_netdevice(struct net_device *dev)
/* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) &&
!(dev->features & NETIF_F_ALL_CSUM)) {
- printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+ printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
}
@@ -2915,7 +2945,7 @@ int register_netdevice(struct net_device *dev)
/* TSO requires that SG is present as well. */
if ((dev->features & NETIF_F_TSO) &&
!(dev->features & NETIF_F_SG)) {
- printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
+ printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
dev->name);
dev->features &= ~NETIF_F_TSO;
}
@@ -3165,13 +3195,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
struct net_device *dev;
int alloc_size;
+ BUG_ON(strlen(name) >= sizeof(dev->name));
+
/* ensure 32-byte alignment of both the device and private area */
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
- printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
return NULL;
}
@@ -3386,12 +3418,9 @@ static void net_dma_rebalance(void)
unsigned int cpu, i, n;
struct dma_chan *chan;
- lock_cpu_hotplug();
-
if (net_dma_count == 0) {
for_each_online_cpu(cpu)
- rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL);
- unlock_cpu_hotplug();
+ rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
return;
}
@@ -3404,15 +3433,13 @@ static void net_dma_rebalance(void)
+ (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
while(n) {
- per_cpu(softnet_data.net_dma, cpu) = chan;
+ per_cpu(softnet_data, cpu).net_dma = chan;
cpu = next_cpu(cpu, cpu_online_map);
n--;
}
i++;
}
rcu_read_unlock();
-
- unlock_cpu_hotplug();
}
/**
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index c57d887da2ef..b22648d04d36 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -21,8 +21,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
-#include <linux/module.h>
+#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
diff --git a/net/core/dst.c b/net/core/dst.c
index 470c05bc4cb2..1a5e49da0e77 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy)
dst_gc_timer_inc = DST_GC_INC;
dst_gc_timer_expires = DST_GC_MIN;
}
- dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
printk("dst_total: %d/%d %ld\n",
atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
- add_timer(&dst_gc_timer);
+ mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
out:
spin_unlock(&dst_lock);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 27ce1683caf5..87dc556fd9d6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -143,7 +143,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
{
struct ethtool_drvinfo info;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
if (!ops->get_drvinfo)
return -EOPNOTSUPP;
@@ -169,7 +169,7 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
{
struct ethtool_regs regs;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
void *regbuf;
int reglen, ret;
@@ -282,7 +282,7 @@ static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
@@ -327,7 +327,7 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
@@ -437,7 +437,7 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam;
- if (!dev->ethtool_ops->get_pauseparam)
+ if (!dev->ethtool_ops->set_pauseparam)
return -EOPNOTSUPP;
if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
@@ -640,7 +640,7 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
@@ -673,7 +673,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gstrings gstrings;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
@@ -733,7 +733,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
{
struct ethtool_stats stats;
- struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
@@ -806,13 +806,6 @@ int dev_ethtool(struct ifreq *ifr)
int rc;
unsigned long old_features;
- /*
- * XXX: This can be pushed down into the ethtool_* handlers that
- * need it. Keep existing behaviour for the moment.
- */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
if (!dev || !netif_device_present(dev))
return -ENODEV;
@@ -822,6 +815,27 @@ int dev_ethtool(struct ifreq *ifr)
if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
return -EFAULT;
+ /* Allow some commands to be done by anyone */
+ switch(ethcmd) {
+ case ETHTOOL_GDRVINFO:
+ case ETHTOOL_GMSGLVL:
+ case ETHTOOL_GCOALESCE:
+ case ETHTOOL_GRINGPARAM:
+ case ETHTOOL_GPAUSEPARAM:
+ case ETHTOOL_GRXCSUM:
+ case ETHTOOL_GTXCSUM:
+ case ETHTOOL_GSG:
+ case ETHTOOL_GSTRINGS:
+ case ETHTOOL_GTSO:
+ case ETHTOOL_GPERMADDR:
+ case ETHTOOL_GUFO:
+ case ETHTOOL_GGSO:
+ break;
+ default:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ }
+
if(dev->ethtool_ops->begin)
if ((rc = dev->ethtool_ops->begin(dev)) < 0)
return rc;
@@ -947,6 +961,10 @@ int dev_ethtool(struct ifreq *ifr)
return rc;
ioctl:
+ /* Keep existing behaviour for the moment. */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (dev->do_ioctl)
return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
return -EOPNOTSUPP;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
new file mode 100644
index 000000000000..a99d87d82b7f
--- /dev/null
+++ b/net/core/fib_rules.c
@@ -0,0 +1,421 @@
+/*
+ * net/core/fib_rules.c Generic Routing Rules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <net/fib_rules.h>
+
+static LIST_HEAD(rules_ops);
+static DEFINE_SPINLOCK(rules_mod_lock);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+ struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ u32 pid);
+
+static struct fib_rules_ops *lookup_rules_ops(int family)
+{
+ struct fib_rules_ops *ops;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &rules_ops, list) {
+ if (ops->family == family) {
+ if (!try_module_get(ops->owner))
+ ops = NULL;
+ rcu_read_unlock();
+ return ops;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+ if (ops)
+ module_put(ops->owner);
+}
+
+int fib_rules_register(struct fib_rules_ops *ops)
+{
+ int err = -EEXIST;
+ struct fib_rules_ops *o;
+
+ if (ops->rule_size < sizeof(struct fib_rule))
+ return -EINVAL;
+
+ if (ops->match == NULL || ops->configure == NULL ||
+ ops->compare == NULL || ops->fill == NULL ||
+ ops->action == NULL)
+ return -EINVAL;
+
+ spin_lock(&rules_mod_lock);
+ list_for_each_entry(o, &rules_ops, list)
+ if (ops->family == o->family)
+ goto errout;
+
+ list_add_tail_rcu(&ops->list, &rules_ops);
+ err = 0;
+errout:
+ spin_unlock(&rules_mod_lock);
+
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void cleanup_ops(struct fib_rules_ops *ops)
+{
+ struct fib_rule *rule, *tmp;
+
+ list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+ list_del_rcu(&rule->list);
+ fib_rule_put(rule);
+ }
+}
+
+int fib_rules_unregister(struct fib_rules_ops *ops)
+{
+ int err = 0;
+ struct fib_rules_ops *o;
+
+ spin_lock(&rules_mod_lock);
+ list_for_each_entry(o, &rules_ops, list) {
+ if (o == ops) {
+ list_del_rcu(&o->list);
+ cleanup_ops(ops);
+ goto out;
+ }
+ }
+
+ err = -ENOENT;
+out:
+ spin_unlock(&rules_mod_lock);
+
+ synchronize_rcu();
+
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+ int flags, struct fib_lookup_arg *arg)
+{
+ struct fib_rule *rule;
+ int err;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(rule, ops->rules_list, list) {
+ if (rule->ifindex && (rule->ifindex != fl->iif))
+ continue;
+
+ if (!ops->match(rule, fl, flags))
+ continue;
+
+ err = ops->action(rule, fl, flags, arg);
+ if (err != -EAGAIN) {
+ fib_rule_get(rule);
+ arg->rule = rule;
+ goto out;
+ }
+ }
+
+ err = -ENETUNREACH;
+out:
+ rcu_read_unlock();
+
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule, *r, *last = NULL;
+ struct nlattr *tb[FRA_MAX+1];
+ int err = -EINVAL;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ goto errout;
+
+ ops = lookup_rules_ops(frh->family);
+ if (ops == NULL) {
+ err = EAFNOSUPPORT;
+ goto errout;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ if (err < 0)
+ goto errout;
+
+ rule = kzalloc(ops->rule_size, GFP_KERNEL);
+ if (rule == NULL) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ if (tb[FRA_PRIORITY])
+ rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+ if (tb[FRA_IFNAME]) {
+ struct net_device *dev;
+
+ rule->ifindex = -1;
+ nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
+ dev = __dev_get_by_name(rule->ifname);
+ if (dev)
+ rule->ifindex = dev->ifindex;
+ }
+
+ rule->action = frh->action;
+ rule->flags = frh->flags;
+ rule->table = frh_get_table(frh, tb);
+
+ if (!rule->pref && ops->default_pref)
+ rule->pref = ops->default_pref();
+
+ err = ops->configure(rule, skb, nlh, frh, tb);
+ if (err < 0)
+ goto errout_free;
+
+ list_for_each_entry(r, ops->rules_list, list) {
+ if (r->pref > rule->pref)
+ break;
+ last = r;
+ }
+
+ fib_rule_get(rule);
+
+ if (last)
+ list_add_rcu(&rule->list, &last->list);
+ else
+ list_add_rcu(&rule->list, ops->rules_list);
+
+ notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ rules_ops_put(ops);
+ return 0;
+
+errout_free:
+ kfree(rule);
+errout:
+ rules_ops_put(ops);
+ return err;
+}
+
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule;
+ struct nlattr *tb[FRA_MAX+1];
+ int err = -EINVAL;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ goto errout;
+
+ ops = lookup_rules_ops(frh->family);
+ if (ops == NULL) {
+ err = EAFNOSUPPORT;
+ goto errout;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ if (err < 0)
+ goto errout;
+
+ list_for_each_entry(rule, ops->rules_list, list) {
+ if (frh->action && (frh->action != rule->action))
+ continue;
+
+ if (frh->table && (frh_get_table(frh, tb) != rule->table))
+ continue;
+
+ if (tb[FRA_PRIORITY] &&
+ (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+ continue;
+
+ if (tb[FRA_IFNAME] &&
+ nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+ continue;
+
+ if (!ops->compare(rule, frh, tb))
+ continue;
+
+ if (rule->flags & FIB_RULE_PERMANENT) {
+ err = -EPERM;
+ goto errout;
+ }
+
+ list_del_rcu(&rule->list);
+ synchronize_rcu();
+ notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+ NETLINK_CB(skb).pid);
+ fib_rule_put(rule);
+ rules_ops_put(ops);
+ return 0;
+ }
+
+ err = -ENOENT;
+errout:
+ rules_ops_put(ops);
+ return err;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+ u32 pid, u32 seq, int type, int flags,
+ struct fib_rules_ops *ops)
+{
+ struct nlmsghdr *nlh;
+ struct fib_rule_hdr *frh;
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+ if (nlh == NULL)
+ return -1;
+
+ frh = nlmsg_data(nlh);
+ frh->table = rule->table;
+ NLA_PUT_U32(skb, FRA_TABLE, rule->table);
+ frh->res1 = 0;
+ frh->res2 = 0;
+ frh->action = rule->action;
+ frh->flags = rule->flags;
+
+ if (rule->ifname[0])
+ NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+
+ if (rule->pref)
+ NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+ if (ops->fill(rule, skb, nlh, frh) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
+}
+
+int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+{
+ int idx = 0;
+ struct fib_rule *rule;
+ struct fib_rules_ops *ops;
+
+ ops = lookup_rules_ops(family);
+ if (ops == NULL)
+ return -EAFNOSUPPORT;
+
+ rcu_read_lock();
+ list_for_each_entry(rule, ops->rules_list, list) {
+ if (idx < cb->args[0])
+ goto skip;
+
+ if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWRULE,
+ NLM_F_MULTI, ops) < 0)
+ break;
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+ rules_ops_put(ops);
+
+ return skb->len;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+ struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ u32 pid)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(ops->nlgroup, err);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+ struct fib_rule *rule;
+
+ list_for_each_entry(rule, rules, list) {
+ if (rule->ifindex == -1 &&
+ strcmp(dev->name, rule->ifname) == 0)
+ rule->ifindex = dev->ifindex;
+ }
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+ struct fib_rule *rule;
+
+ list_for_each_entry(rule, rules, list)
+ if (rule->ifindex == dev->ifindex)
+ rule->ifindex = -1;
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct fib_rules_ops *ops;
+
+ ASSERT_RTNL();
+ rcu_read_lock();
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ list_for_each_entry(ops, &rules_ops, list)
+ attach_rules(ops->rules_list, dev);
+ break;
+
+ case NETDEV_UNREGISTER:
+ list_for_each_entry(ops, &rules_ops, list)
+ detach_rules(ops->rules_list, dev);
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+ .notifier_call = fib_rules_event,
+};
+
+static int __init fib_rules_init(void)
+{
+ return register_netdevice_notifier(&fib_rules_notifier);
+}
+
+subsys_initcall(fib_rules_init);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b4486a60cf6..6732782a5a40 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (!err) {
struct sk_filter *old_fp;
- spin_lock_bh(&sk->sk_lock.slock);
- old_fp = sk->sk_filter;
- sk->sk_filter = fp;
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_read_lock_bh();
+ old_fp = rcu_dereference(sk->sk_filter);
+ rcu_assign_pointer(sk->sk_filter, fp);
+ rcu_read_unlock_bh();
fp = old_fp;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 2191af5f26ac..f23e7e386543 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -32,7 +32,6 @@ struct flow_cache_entry {
u8 dir;
struct flowi key;
u32 genid;
- u32 sk_sid;
void *object;
atomic_t *object_ref;
};
@@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
return 0;
}
-void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver)
{
struct flow_cache_entry *fle, **head;
@@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
for (fle = *head; fle; fle = fle->next) {
if (fle->family == family &&
fle->dir == dir &&
- fle->sk_sid == sk_sid &&
flow_key_compare(key, &fle->key) == 0) {
if (fle->genid == atomic_read(&flow_cache_genid)) {
void *ret = fle->object;
@@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
*head = fle;
fle->family = family;
fle->dir = dir;
- fle->sk_sid = sk_sid;
memcpy(&fle->key, key, sizeof(*key));
fle->object = NULL;
flow_count(cpu)++;
@@ -226,7 +223,7 @@ nocache:
void *obj;
atomic_t *obj_ref;
- resolver(key, sk_sid, family, dir, &obj, &obj_ref);
+ resolver(key, family, dir, &obj, &obj_ref);
if (fle) {
fle->genid = atomic_read(&flow_cache_genid);
@@ -346,12 +343,8 @@ static int __init flow_cache_init(void)
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
-
- if (!flow_cachep)
- panic("NET: failed to allocate flow cache slab\n");
-
flow_hash_shift = 10;
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 0f37266411b5..4b36114744c5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -11,7 +11,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/if.h>
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 50a8c73caf97..8ce8c471d868 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,7 +15,6 @@
* Harald Welte Add neighbour cache statistics like rtstat
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -30,6 +29,8 @@
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
#include <linux/string.h>
@@ -755,6 +756,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
+ notify = 1;
}
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
@@ -763,6 +765,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
+ notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
@@ -820,6 +823,8 @@ static void neigh_timer_handler(unsigned long arg)
out:
write_unlock(&neigh->lock);
}
+ if (notify)
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
#ifdef CONFIG_ARPD
if (notify && neigh->parms->app_probes)
@@ -884,7 +889,7 @@ out_unlock_bh:
return rc;
}
-static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -927,9 +932,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
{
u8 old;
int err;
-#ifdef CONFIG_ARPD
int notify = 0;
-#endif
struct net_device *dev;
int update_isrouter = 0;
@@ -949,9 +952,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh_suspect(neigh);
neigh->nud_state = new;
err = 0;
-#ifdef CONFIG_ARPD
notify = old & NUD_VALID;
-#endif
goto out;
}
@@ -1023,9 +1024,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies -
(neigh->parms->base_reachable_time << 1);
-#ifdef CONFIG_ARPD
notify = 1;
-#endif
}
if (new == old)
goto out;
@@ -1057,6 +1056,9 @@ out:
(neigh->flags & ~NTF_ROUTER);
}
write_unlock_bh(&neigh->lock);
+
+ if (notify)
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
#ifdef CONFIG_ARPD
if (notify && neigh->parms->app_probes)
neigh_app_notify(neigh);
@@ -1077,7 +1079,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
- u16 protocol)
+ __be16 protocol)
{
struct hh_cache *hh;
struct net_device *dev = dst->dev;
@@ -1337,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
neigh_rand_reach_time(tbl->parms.base_reachable_time);
if (!tbl->kmem_cachep)
- tbl->kmem_cachep = kmem_cache_create(tbl->id,
- tbl->entry_size,
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
-
- if (!tbl->kmem_cachep)
- panic("cannot create neighbour cache");
-
+ tbl->kmem_cachep =
+ kmem_cache_create(tbl->id, tbl->entry_size, 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ NULL, NULL);
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
panic("cannot create neighbour cache statistics");
@@ -1431,53 +1429,70 @@ int neigh_table_clear(struct neigh_table *tbl)
kfree(tbl->phash_buckets);
tbl->phash_buckets = NULL;
+ free_percpu(tbl->stats);
+ tbl->stats = NULL;
+
return 0;
}
int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct ndmsg *ndm = NLMSG_DATA(nlh);
- struct rtattr **nda = arg;
+ struct ndmsg *ndm;
+ struct nlattr *dst_attr;
struct neigh_table *tbl;
struct net_device *dev = NULL;
- int err = -ENODEV;
+ int err = -EINVAL;
+
+ if (nlmsg_len(nlh) < sizeof(*ndm))
+ goto out;
- if (ndm->ndm_ifindex &&
- (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+ if (dst_attr == NULL)
goto out;
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index(ndm->ndm_ifindex);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
+
read_lock(&neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- struct rtattr *dst_attr = nda[NDA_DST - 1];
- struct neighbour *n;
+ struct neighbour *neigh;
if (tbl->family != ndm->ndm_family)
continue;
read_unlock(&neigh_tbl_lock);
- err = -EINVAL;
- if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+ if (nla_len(dst_attr) < tbl->key_len)
goto out_dev_put;
if (ndm->ndm_flags & NTF_PROXY) {
- err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+ err = pneigh_delete(tbl, nla_data(dst_attr), dev);
goto out_dev_put;
}
- if (!dev)
- goto out;
+ if (dev == NULL)
+ goto out_dev_put;
- n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
- if (n) {
- err = neigh_update(n, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_ADMIN);
- neigh_release(n);
+ neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+ if (neigh == NULL) {
+ err = -ENOENT;
+ goto out_dev_put;
}
+
+ err = neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_ADMIN);
+ neigh_release(neigh);
goto out_dev_put;
}
read_unlock(&neigh_tbl_lock);
- err = -EADDRNOTAVAIL;
+ err = -EAFNOSUPPORT;
+
out_dev_put:
if (dev)
dev_put(dev);
@@ -1487,76 +1502,93 @@ out:
int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct ndmsg *ndm = NLMSG_DATA(nlh);
- struct rtattr **nda = arg;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
struct net_device *dev = NULL;
- int err = -ENODEV;
+ int err;
- if (ndm->ndm_ifindex &&
- (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ if (err < 0)
goto out;
+ err = -EINVAL;
+ if (tb[NDA_DST] == NULL)
+ goto out;
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index(ndm->ndm_ifindex);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+ goto out_dev_put;
+ }
+
read_lock(&neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
- struct rtattr *dst_attr = nda[NDA_DST - 1];
- int override = 1;
- struct neighbour *n;
+ int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+ struct neighbour *neigh;
+ void *dst, *lladdr;
if (tbl->family != ndm->ndm_family)
continue;
read_unlock(&neigh_tbl_lock);
- err = -EINVAL;
- if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < tbl->key_len)
goto out_dev_put;
+ dst = nla_data(tb[NDA_DST]);
+ lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
if (ndm->ndm_flags & NTF_PROXY) {
+ struct pneigh_entry *pn;
+
err = -ENOBUFS;
- if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
+ pn = pneigh_lookup(tbl, dst, dev, 1);
+ if (pn) {
+ pn->flags = ndm->ndm_flags;
err = 0;
+ }
goto out_dev_put;
}
- err = -EINVAL;
- if (!dev)
- goto out;
- if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+ if (dev == NULL)
goto out_dev_put;
+
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (neigh == NULL) {
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
+ goto out_dev_put;
+ }
- n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
- if (n) {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- err = -EEXIST;
- neigh_release(n);
+ neigh = __neigh_lookup_errno(tbl, dst, dev);
+ if (IS_ERR(neigh)) {
+ err = PTR_ERR(neigh);
goto out_dev_put;
}
-
- override = nlh->nlmsg_flags & NLM_F_REPLACE;
- } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- err = -ENOENT;
- goto out_dev_put;
} else {
- n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
- if (IS_ERR(n)) {
- err = PTR_ERR(n);
+ if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ err = -EEXIST;
+ neigh_release(neigh);
goto out_dev_put;
}
- }
- err = neigh_update(n,
- lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
- ndm->ndm_state,
- (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
- NEIGH_UPDATE_F_ADMIN);
+ if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+ flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+ }
- neigh_release(n);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ neigh_release(neigh);
goto out_dev_put;
}
read_unlock(&neigh_tbl_lock);
- err = -EADDRNOTAVAIL;
+ err = -EAFNOSUPPORT;
+
out_dev_put:
if (dev)
dev_put(dev);
@@ -1566,56 +1598,59 @@ out:
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
- struct rtattr *nest = NULL;
-
- nest = RTA_NEST(skb, NDTA_PARMS);
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NDTA_PARMS);
+ if (nest == NULL)
+ return -ENOBUFS;
if (parms->dev)
- RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
-
- RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
- RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
- RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
- RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
- RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
- RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
- RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
- RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+ NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+
+ NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+ NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+ NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+ NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+ NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+ NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+ NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+ NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
parms->base_reachable_time);
- RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
- RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
- RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
- RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
- RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
- RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+ NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+ NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+ NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+ NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+ NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+ NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
- return RTA_NEST_END(skb, nest);
+ return nla_nest_end(skb, nest);
-rtattr_failure:
- return RTA_NEST_CANCEL(skb, nest);
+nla_put_failure:
+ return nla_nest_cancel(skb, nest);
}
-static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+ u32 pid, u32 seq, int type, int flags)
{
struct nlmsghdr *nlh;
struct ndtmsg *ndtmsg;
- nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
- NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
- ndtmsg = NLMSG_DATA(nlh);
+ ndtmsg = nlmsg_data(nlh);
read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
- RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
- RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
- RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
- RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
- RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+ NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+ NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+ NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+ NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+ NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
{
unsigned long now = jiffies;
@@ -1634,7 +1669,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};
- RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+ NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
}
{
@@ -1659,55 +1694,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
ndst.ndts_forced_gc_runs += st->forced_gc_runs;
}
- RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+ NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
}
BUG_ON(tbl->parms.dev);
if (neightbl_fill_parms(skb, &tbl->parms) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
read_unlock_bh(&tbl->lock);
- return NLMSG_END(skb, nlh);
+ return nlmsg_end(skb, nlh);
-rtattr_failure:
+nla_put_failure:
read_unlock_bh(&tbl->lock);
- return NLMSG_CANCEL(skb, nlh);
-
-nlmsg_failure:
- return -1;
+ return nlmsg_cancel(skb, nlh);
}
-static int neightbl_fill_param_info(struct neigh_table *tbl,
+static int neightbl_fill_param_info(struct sk_buff *skb,
+ struct neigh_table *tbl,
struct neigh_parms *parms,
- struct sk_buff *skb,
- struct netlink_callback *cb)
+ u32 pid, u32 seq, int type,
+ unsigned int flags)
{
struct ndtmsg *ndtmsg;
struct nlmsghdr *nlh;
- nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
- NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
- ndtmsg = NLMSG_DATA(nlh);
+ ndtmsg = nlmsg_data(nlh);
read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
- RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
- if (neightbl_fill_parms(skb, parms) < 0)
- goto rtattr_failure;
+ if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+ neightbl_fill_parms(skb, parms) < 0)
+ goto errout;
read_unlock_bh(&tbl->lock);
- return NLMSG_END(skb, nlh);
-
-rtattr_failure:
+ return nlmsg_end(skb, nlh);
+errout:
read_unlock_bh(&tbl->lock);
- return NLMSG_CANCEL(skb, nlh);
-
-nlmsg_failure:
- return -1;
+ return nlmsg_cancel(skb, nlh);
}
static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
@@ -1723,28 +1753,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
return NULL;
}
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+ [NDTA_NAME] = { .type = NLA_STRING },
+ [NDTA_THRESH1] = { .type = NLA_U32 },
+ [NDTA_THRESH2] = { .type = NLA_U32 },
+ [NDTA_THRESH3] = { .type = NLA_U32 },
+ [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
+ [NDTA_PARMS] = { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+ [NDTPA_IFINDEX] = { .type = NLA_U32 },
+ [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
+ [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
+ [NDTPA_APP_PROBES] = { .type = NLA_U32 },
+ [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
+ [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
+ [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
+ [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
+ [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
+ [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
+ [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
+ [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
+ [NDTPA_LOCKTIME] = { .type = NLA_U64 },
+};
+
int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct neigh_table *tbl;
- struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
- struct rtattr **tb = arg;
- int err = -EINVAL;
+ struct ndtmsg *ndtmsg;
+ struct nlattr *tb[NDTA_MAX+1];
+ int err;
- if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
- return -EINVAL;
+ err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+ nl_neightbl_policy);
+ if (err < 0)
+ goto errout;
+
+ if (tb[NDTA_NAME] == NULL) {
+ err = -EINVAL;
+ goto errout;
+ }
+ ndtmsg = nlmsg_data(nlh);
read_lock(&neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl->next) {
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
continue;
- if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+ if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
break;
}
if (tbl == NULL) {
err = -ENOENT;
- goto errout;
+ goto errout_locked;
}
/*
@@ -1753,165 +1816,178 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
*/
write_lock_bh(&tbl->lock);
- if (tb[NDTA_THRESH1 - 1])
- tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
-
- if (tb[NDTA_THRESH2 - 1])
- tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
-
- if (tb[NDTA_THRESH3 - 1])
- tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
-
- if (tb[NDTA_GC_INTERVAL - 1])
- tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
-
- if (tb[NDTA_PARMS - 1]) {
- struct rtattr *tbp[NDTPA_MAX];
+ if (tb[NDTA_PARMS]) {
+ struct nlattr *tbp[NDTPA_MAX+1];
struct neigh_parms *p;
- u32 ifindex = 0;
+ int i, ifindex = 0;
- if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
- goto rtattr_failure;
+ err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+ nl_ntbl_parm_policy);
+ if (err < 0)
+ goto errout_tbl_lock;
- if (tbp[NDTPA_IFINDEX - 1])
- ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+ if (tbp[NDTPA_IFINDEX])
+ ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
p = lookup_neigh_params(tbl, ifindex);
if (p == NULL) {
err = -ENOENT;
- goto rtattr_failure;
+ goto errout_tbl_lock;
}
-
- if (tbp[NDTPA_QUEUE_LEN - 1])
- p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
-
- if (tbp[NDTPA_PROXY_QLEN - 1])
- p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
-
- if (tbp[NDTPA_APP_PROBES - 1])
- p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
-
- if (tbp[NDTPA_UCAST_PROBES - 1])
- p->ucast_probes =
- RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
- if (tbp[NDTPA_MCAST_PROBES - 1])
- p->mcast_probes =
- RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
-
- if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
- p->base_reachable_time =
- RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
+ for (i = 1; i <= NDTPA_MAX; i++) {
+ if (tbp[i] == NULL)
+ continue;
- if (tbp[NDTPA_GC_STALETIME - 1])
- p->gc_staletime =
- RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
+ switch (i) {
+ case NDTPA_QUEUE_LEN:
+ p->queue_len = nla_get_u32(tbp[i]);
+ break;
+ case NDTPA_PROXY_QLEN:
+ p->proxy_qlen = nla_get_u32(tbp[i]);
+ break;
+ case NDTPA_APP_PROBES:
+ p->app_probes = nla_get_u32(tbp[i]);
+ break;
+ case NDTPA_UCAST_PROBES:
+ p->ucast_probes = nla_get_u32(tbp[i]);
+ break;
+ case NDTPA_MCAST_PROBES:
+ p->mcast_probes = nla_get_u32(tbp[i]);
+ break;
+ case NDTPA_BASE_REACHABLE_TIME:
+ p->base_reachable_time = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_GC_STALETIME:
+ p->gc_staletime = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_DELAY_PROBE_TIME:
+ p->delay_probe_time = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_RETRANS_TIME:
+ p->retrans_time = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_ANYCAST_DELAY:
+ p->anycast_delay = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_PROXY_DELAY:
+ p->proxy_delay = nla_get_msecs(tbp[i]);
+ break;
+ case NDTPA_LOCKTIME:
+ p->locktime = nla_get_msecs(tbp[i]);
+ break;
+ }
+ }
+ }
- if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
- p->delay_probe_time =
- RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
+ if (tb[NDTA_THRESH1])
+ tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
- if (tbp[NDTPA_RETRANS_TIME - 1])
- p->retrans_time =
- RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
+ if (tb[NDTA_THRESH2])
+ tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
- if (tbp[NDTPA_ANYCAST_DELAY - 1])
- p->anycast_delay =
- RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
+ if (tb[NDTA_THRESH3])
+ tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
- if (tbp[NDTPA_PROXY_DELAY - 1])
- p->proxy_delay =
- RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
-
- if (tbp[NDTPA_LOCKTIME - 1])
- p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
- }
+ if (tb[NDTA_GC_INTERVAL])
+ tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
err = 0;
-rtattr_failure:
+errout_tbl_lock:
write_unlock_bh(&tbl->lock);
-errout:
+errout_locked:
read_unlock(&neigh_tbl_lock);
+errout:
return err;
}
int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
- int idx, family;
- int s_idx = cb->args[0];
+ int family, tidx, nidx = 0;
+ int tbl_skip = cb->args[0];
+ int neigh_skip = cb->args[1];
struct neigh_table *tbl;
- family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+ family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
+ for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
struct neigh_parms *p;
- if (idx < s_idx || (family && tbl->family != family))
+ if (tidx < tbl_skip || (family && tbl->family != family))
continue;
- if (neightbl_fill_info(tbl, skb, cb) <= 0)
+ if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+ NLM_F_MULTI) <= 0)
break;
- for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
- if (idx < s_idx)
+ for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+ if (nidx < neigh_skip)
continue;
- if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
+ if (neightbl_fill_param_info(skb, tbl, p,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGHTBL,
+ NLM_F_MULTI) <= 0)
goto out;
}
+ neigh_skip = 0;
}
out:
read_unlock(&neigh_tbl_lock);
- cb->args[0] = idx;
+ cb->args[0] = tidx;
+ cb->args[1] = nidx;
return skb->len;
}
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
- u32 pid, u32 seq, int event, unsigned int flags)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+ u32 pid, u32 seq, int type, unsigned int flags)
{
unsigned long now = jiffies;
- unsigned char *b = skb->tail;
struct nda_cacheinfo ci;
- int locked = 0;
- u32 probes;
- struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
- sizeof(struct ndmsg), flags);
- struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct nlmsghdr *nlh;
+ struct ndmsg *ndm;
- ndm->ndm_family = n->ops->family;
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ ndm = nlmsg_data(nlh);
+ ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = n->flags;
- ndm->ndm_type = n->type;
- ndm->ndm_ifindex = n->dev->ifindex;
- RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
- read_lock_bh(&n->lock);
- locked = 1;
- ndm->ndm_state = n->nud_state;
- if (n->nud_state & NUD_VALID)
- RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
- ci.ndm_used = now - n->used;
- ci.ndm_confirmed = now - n->confirmed;
- ci.ndm_updated = now - n->updated;
- ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
- probes = atomic_read(&n->probes);
- read_unlock_bh(&n->lock);
- locked = 0;
- RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
- RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ ndm->ndm_flags = neigh->flags;
+ ndm->ndm_type = neigh->type;
+ ndm->ndm_ifindex = neigh->dev->ifindex;
+
+ NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
-nlmsg_failure:
-rtattr_failure:
- if (locked)
- read_unlock_bh(&n->lock);
- skb_trim(skb, b - skb->data);
- return -1;
+ read_lock_bh(&neigh->lock);
+ ndm->ndm_state = neigh->nud_state;
+ if ((neigh->nud_state & NUD_VALID) &&
+ nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
+ read_unlock_bh(&neigh->lock);
+ goto nla_put_failure;
+ }
+
+ ci.ndm_used = now - neigh->used;
+ ci.ndm_confirmed = now - neigh->confirmed;
+ ci.ndm_updated = now - neigh->updated;
+ ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
+ read_unlock_bh(&neigh->lock);
+
+ NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+ NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
@@ -1955,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
int t, family, s_t;
read_lock(&neigh_tbl_lock);
- family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+ family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
s_t = cb->args[0];
for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -2334,41 +2410,35 @@ static struct file_operations neigh_stat_seq_fops = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_ARPD
-void neigh_app_ns(struct neighbour *n)
+static void __neigh_notify(struct neighbour *n, int type, int flags)
{
- struct nlmsghdr *nlh;
- int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
- struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
- if (!skb)
- return;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
- if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
+ err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ if (err < 0) {
kfree_skb(skb);
- return;
+ goto errout;
}
- nlh = (struct nlmsghdr *)skb->data;
- nlh->nlmsg_flags = NLM_F_REQUEST;
- NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_NEIGH, err);
}
-static void neigh_app_notify(struct neighbour *n)
+void neigh_app_ns(struct neighbour *n)
{
- struct nlmsghdr *nlh;
- int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
- struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
-
- if (!skb)
- return;
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+}
- if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
- kfree_skb(skb);
- return;
- }
- nlh = (struct nlmsghdr *)skb->data;
- NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+static void neigh_app_notify(struct neighbour *n)
+{
+ __neigh_notify(n, RTM_NEWNEIGH, 0);
}
#endif /* CONFIG_ARPD */
@@ -2382,7 +2452,7 @@ static struct neigh_sysctl_table {
ctl_table neigh_neigh_dir[2];
ctl_table neigh_proto_dir[2];
ctl_table neigh_root_dir[2];
-} neigh_sysctl_template = {
+} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
{
.ctl_name = NET_NEIGH_MCAST_SOLICIT,
@@ -2655,7 +2725,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
#endif /* CONFIG_SYSCTL */
EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_add);
EXPORT_SYMBOL(neigh_changeaddr);
EXPORT_SYMBOL(neigh_compat_output);
EXPORT_SYMBOL(neigh_connected_output);
@@ -2675,11 +2744,8 @@ EXPORT_SYMBOL(neigh_table_clear);
EXPORT_SYMBOL(neigh_table_init);
EXPORT_SYMBOL(neigh_table_init_no_netlink);
EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(neigh_update_hhs);
EXPORT_SYMBOL(pneigh_enqueue);
EXPORT_SYMBOL(pneigh_lookup);
-EXPORT_SYMBOL(neightbl_dump_info);
-EXPORT_SYMBOL(neightbl_set);
#ifdef CONFIG_ARPD
EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 47a6fceb6771..f47f319bb7dc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -10,7 +10,6 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
@@ -345,8 +344,6 @@ static ssize_t wireless_show(struct class_device *cd, char *buf,
if(dev->wireless_handlers &&
dev->wireless_handlers->get_wireless_stats)
iw = dev->wireless_handlers->get_wireless_stats(dev);
- else if (dev->get_wireless_stats)
- iw = dev->get_wireless_stats(dev);
if (iw != NULL)
ret = (*format)(iw, buf);
}
@@ -466,8 +463,7 @@ int netdev_register_sysfs(struct net_device *net)
*groups++ = &netstat_group;
#ifdef WIRELESS_EXT
- if (net->get_wireless_stats
- || (net->wireless_handlers && net->wireless_handlers->get_wireless_stats))
+ if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
diff --git a/net/core/netevent.c b/net/core/netevent.c
new file mode 100644
index 000000000000..35d02c38554e
--- /dev/null
+++ b/net/core/netevent.c
@@ -0,0 +1,69 @@
+/*
+ * Network event notifiers
+ *
+ * Authors:
+ * Tom Tucker <tom@opengridcomputing.com>
+ * Steve Wise <swise@opengridcomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ */
+
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
+
+/**
+ * register_netevent_notifier - register a netevent notifier block
+ * @nb: notifier
+ *
+ * Register a notifier to be called when a netevent occurs.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ */
+int register_netevent_notifier(struct notifier_block *nb)
+{
+ int err;
+
+ err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
+ return err;
+}
+
+/**
+ * netevent_unregister_notifier - unregister a netevent notifier block
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_neigh_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ */
+
+int unregister_netevent_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
+}
+
+/**
+ * call_netevent_notifiers - call all netevent notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @v: pointer passed unmodified to notifier function
+ *
+ * Call all neighbour notifier blocks. Parameters and return value
+ * are as for notifier_call_chain().
+ */
+
+int call_netevent_notifiers(unsigned long val, void *v)
+{
+ return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
+}
+
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
+EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9cb781830380..ead5920c26d6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -54,6 +54,7 @@ static atomic_t trapped;
sizeof(struct iphdr) + sizeof(struct ethhdr))
static void zap_completion_queue(void);
+static void arp_reply(struct sk_buff *skb);
static void queue_process(void *p)
{
@@ -109,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
- if (skb->ip_summed == CHECKSUM_HW &&
+ if (skb->ip_summed == CHECKSUM_COMPLETE &&
!(u16)csum_fold(csum_add(psum, skb->csum)))
return 0;
@@ -153,6 +154,22 @@ static void poll_napi(struct netpoll *np)
}
}
+static void service_arp_queue(struct netpoll_info *npi)
+{
+ struct sk_buff *skb;
+
+ if (unlikely(!npi))
+ return;
+
+ skb = skb_dequeue(&npi->arp_tx);
+
+ while (skb != NULL) {
+ arp_reply(skb);
+ skb = skb_dequeue(&npi->arp_tx);
+ }
+ return;
+}
+
void netpoll_poll(struct netpoll *np)
{
if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
@@ -163,6 +180,8 @@ void netpoll_poll(struct netpoll *np)
if (np->dev->poll)
poll_napi(np);
+ service_arp_queue(np->dev->npinfo);
+
zap_completion_queue();
}
@@ -279,14 +298,10 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
* network drivers do not expect to be called if the queue is
* stopped.
*/
- if (netif_queue_stopped(np->dev)) {
- netif_tx_unlock(np->dev);
- netpoll_poll(np);
- udelay(50);
- continue;
- }
+ status = NETDEV_TX_BUSY;
+ if (!netif_queue_stopped(np->dev))
+ status = np->dev->hard_start_xmit(skb, np->dev);
- status = np->dev->hard_start_xmit(skb, np->dev);
netif_tx_unlock(np->dev);
/* success */
@@ -446,7 +461,9 @@ int __netpoll_rx(struct sk_buff *skb)
int proto, len, ulen;
struct iphdr *iph;
struct udphdr *uh;
- struct netpoll *np = skb->dev->npinfo->rx_np;
+ struct netpoll_info *npi = skb->dev->npinfo;
+ struct netpoll *np = npi->rx_np;
+
if (!np)
goto out;
@@ -456,7 +473,7 @@ int __netpoll_rx(struct sk_buff *skb)
/* check if netpoll clients need ARP */
if (skb->protocol == __constant_htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
- arp_reply(skb);
+ skb_queue_tail(&npi->arp_tx, skb);
return 1;
}
@@ -651,6 +668,7 @@ int netpoll_setup(struct netpoll *np)
npinfo->poll_owner = -1;
npinfo->tries = MAX_RETRIES;
spin_lock_init(&npinfo->rx_lock);
+ skb_queue_head_init(&npinfo->arp_tx);
} else
npinfo = ndev->npinfo;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 67ed14ddabd2..dd023fd28304 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -109,6 +109,8 @@
*
* MPLS support by Steven Whitehouse <steve@chygwyn.com>
*
+ * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com>
+ *
*/
#include <linux/sys.h>
#include <linux/types.h>
@@ -137,6 +139,7 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -157,7 +160,7 @@
#include <asm/div64.h> /* do_div */
#include <asm/timex.h>
-#define VERSION "pktgen v2.67: Packet Generator for packet performance testing.\n"
+#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
/* #define PG_DEBUG(a) a */
#define PG_DEBUG(a)
@@ -178,6 +181,8 @@
#define F_TXSIZE_RND (1<<6) /* Transmit size is random */
#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */
#define F_MPLS_RND (1<<8) /* Random MPLS labels */
+#define F_VID_RND (1<<9) /* Random VLAN ID */
+#define F_SVID_RND (1<<10) /* Random SVLAN ID */
/* Thread control flag bits */
#define T_TERMINATE (1<<0)
@@ -198,6 +203,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
#define MAX_CFLOWS 65536
+#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4)
+#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
+
struct flow_state {
__u32 cur_daddr;
int count;
@@ -284,10 +292,23 @@ struct pktgen_dev {
__u16 udp_dst_min; /* inclusive, dest UDP port */
__u16 udp_dst_max; /* exclusive, dest UDP port */
+ /* DSCP + ECN */
+ __u8 tos; /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */
+ __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */
+
/* MPLS */
unsigned nr_labels; /* Depth of stack, 0 = no MPLS */
__be32 labels[MAX_MPLS_LABELS];
+ /* VLAN/SVLAN (802.1Q/Q-in-Q) */
+ __u8 vlan_p;
+ __u8 vlan_cfi;
+ __u16 vlan_id; /* 0xffff means no vlan tag */
+
+ __u8 svlan_p;
+ __u8 svlan_cfi;
+ __u16 svlan_id; /* 0xffff means no svlan tag */
+
__u32 src_mac_count; /* How many MACs to iterate through */
__u32 dst_mac_count; /* How many MACs to iterate through */
@@ -644,6 +665,24 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
i == pkt_dev->nr_labels-1 ? "\n" : ", ");
}
+ if (pkt_dev->vlan_id != 0xffff) {
+ seq_printf(seq, " vlan_id: %u vlan_p: %u vlan_cfi: %u\n",
+ pkt_dev->vlan_id, pkt_dev->vlan_p, pkt_dev->vlan_cfi);
+ }
+
+ if (pkt_dev->svlan_id != 0xffff) {
+ seq_printf(seq, " svlan_id: %u vlan_p: %u vlan_cfi: %u\n",
+ pkt_dev->svlan_id, pkt_dev->svlan_p, pkt_dev->svlan_cfi);
+ }
+
+ if (pkt_dev->tos) {
+ seq_printf(seq, " tos: 0x%02x\n", pkt_dev->tos);
+ }
+
+ if (pkt_dev->traffic_class) {
+ seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ }
+
seq_printf(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -673,6 +712,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_MACDST_RND)
seq_printf(seq, "MACDST_RND ");
+ if (pkt_dev->flags & F_VID_RND)
+ seq_printf(seq, "VID_RND ");
+
+ if (pkt_dev->flags & F_SVID_RND)
+ seq_printf(seq, "SVID_RND ");
+
seq_puts(seq, "\n");
sa = pkt_dev->started_at;
@@ -715,12 +760,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
}
-static int hex32_arg(const char __user *user_buffer, __u32 *num)
+static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32 *num)
{
int i = 0;
*num = 0;
- for(; i < 8; i++) {
+ for(; i < maxlen; i++) {
char c;
*num <<= 4;
if (get_user(c, &user_buffer[i]))
@@ -815,7 +860,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
pkt_dev->nr_labels = 0;
do {
__u32 tmp;
- len = hex32_arg(&buffer[i], &tmp);
+ len = hex32_arg(&buffer[i], 8, &tmp);
if (len <= 0)
return len;
pkt_dev->labels[n] = htonl(tmp);
@@ -1140,11 +1185,27 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!MPLS_RND") == 0)
pkt_dev->flags &= ~F_MPLS_RND;
+ else if (strcmp(f, "VID_RND") == 0)
+ pkt_dev->flags |= F_VID_RND;
+
+ else if (strcmp(f, "!VID_RND") == 0)
+ pkt_dev->flags &= ~F_VID_RND;
+
+ else if (strcmp(f, "SVID_RND") == 0)
+ pkt_dev->flags |= F_SVID_RND;
+
+ else if (strcmp(f, "!SVID_RND") == 0)
+ pkt_dev->flags &= ~F_SVID_RND;
+
+ else if (strcmp(f, "!IPV6") == 0)
+ pkt_dev->flags &= ~F_IPV6;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
- "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n");
+ "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -1445,6 +1506,160 @@ static ssize_t pktgen_if_write(struct file *file,
offset += sprintf(pg_result + offset,
"%08x%s", ntohl(pkt_dev->labels[n]),
n == pkt_dev->nr_labels-1 ? "" : ",");
+
+ if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) {
+ pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+ pkt_dev->svlan_id = 0xffff;
+
+ if (debug)
+ printk("pktgen: VLAN/SVLAN auto turned off\n");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "vlan_id")) {
+ len = num_arg(&user_buffer[i], 4, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if (value <= 4095) {
+ pkt_dev->vlan_id = value; /* turn on VLAN */
+
+ if (debug)
+ printk("pktgen: VLAN turned on\n");
+
+ if (debug && pkt_dev->nr_labels)
+ printk("pktgen: MPLS auto turned off\n");
+
+ pkt_dev->nr_labels = 0; /* turn off MPLS */
+ sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id);
+ } else {
+ pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+ pkt_dev->svlan_id = 0xffff;
+
+ if (debug)
+ printk("pktgen: VLAN/SVLAN turned off\n");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "vlan_p")) {
+ len = num_arg(&user_buffer[i], 1, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) {
+ pkt_dev->vlan_p = value;
+ sprintf(pg_result, "OK: vlan_p=%u", pkt_dev->vlan_p);
+ } else {
+ sprintf(pg_result, "ERROR: vlan_p must be 0-7");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "vlan_cfi")) {
+ len = num_arg(&user_buffer[i], 1, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) {
+ pkt_dev->vlan_cfi = value;
+ sprintf(pg_result, "OK: vlan_cfi=%u", pkt_dev->vlan_cfi);
+ } else {
+ sprintf(pg_result, "ERROR: vlan_cfi must be 0-1");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "svlan_id")) {
+ len = num_arg(&user_buffer[i], 4, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) {
+ pkt_dev->svlan_id = value; /* turn on SVLAN */
+
+ if (debug)
+ printk("pktgen: SVLAN turned on\n");
+
+ if (debug && pkt_dev->nr_labels)
+ printk("pktgen: MPLS auto turned off\n");
+
+ pkt_dev->nr_labels = 0; /* turn off MPLS */
+ sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id);
+ } else {
+ pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+ pkt_dev->svlan_id = 0xffff;
+
+ if (debug)
+ printk("pktgen: VLAN/SVLAN turned off\n");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "svlan_p")) {
+ len = num_arg(&user_buffer[i], 1, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) {
+ pkt_dev->svlan_p = value;
+ sprintf(pg_result, "OK: svlan_p=%u", pkt_dev->svlan_p);
+ } else {
+ sprintf(pg_result, "ERROR: svlan_p must be 0-7");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "svlan_cfi")) {
+ len = num_arg(&user_buffer[i], 1, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) {
+ pkt_dev->svlan_cfi = value;
+ sprintf(pg_result, "OK: svlan_cfi=%u", pkt_dev->svlan_cfi);
+ } else {
+ sprintf(pg_result, "ERROR: svlan_cfi must be 0-1");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "tos")) {
+ __u32 tmp_value = 0;
+ len = hex32_arg(&user_buffer[i], 2, &tmp_value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if (len == 2) {
+ pkt_dev->tos = tmp_value;
+ sprintf(pg_result, "OK: tos=0x%02x", pkt_dev->tos);
+ } else {
+ sprintf(pg_result, "ERROR: tos must be 00-ff");
+ }
+ return count;
+ }
+
+ if (!strcmp(name, "traffic_class")) {
+ __u32 tmp_value = 0;
+ len = hex32_arg(&user_buffer[i], 2, &tmp_value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ if (len == 2) {
+ pkt_dev->traffic_class = tmp_value;
+ sprintf(pg_result, "OK: traffic_class=0x%02x", pkt_dev->traffic_class);
+ } else {
+ sprintf(pg_result, "ERROR: traffic_class must be 00-ff");
+ }
return count;
}
@@ -1786,7 +2001,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
* use ipv6_get_lladdr if/when it's get exported
*/
- read_lock(&addrconf_lock);
+ rcu_read_lock();
if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
struct inet6_ifaddr *ifp;
@@ -1805,7 +2020,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
}
read_unlock_bh(&idev->lock);
}
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
if (err)
printk("pktgen: ERROR: IPv6 link address not availble.\n");
}
@@ -1949,6 +2164,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
htonl(0x000fffff));
}
+ if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
+ pkt_dev->vlan_id = pktgen_random() % 4096;
+ }
+
+ if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
+ pkt_dev->svlan_id = pktgen_random() % 4096;
+ }
+
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
pkt_dev->cur_udp_src =
@@ -2092,10 +2315,18 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct pktgen_hdr *pgh = NULL;
__be16 protocol = __constant_htons(ETH_P_IP);
__be32 *mpls;
+ __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
+ __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
+ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
+ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+
if (pkt_dev->nr_labels)
protocol = __constant_htons(ETH_P_MPLS_UC);
+ if (pkt_dev->vlan_id != 0xffff)
+ protocol = __constant_htons(ETH_P_8021Q);
+
/* Update any of the values, used when we're incrementing various
* fields.
*/
@@ -2103,7 +2334,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
datalen = (odev->hard_header_len + 16) & ~0xf;
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen +
- pkt_dev->nr_labels*sizeof(u32), GFP_ATOMIC);
+ pkt_dev->nr_labels*sizeof(u32) +
+ VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
+ GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -2116,6 +2349,24 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
if (pkt_dev->nr_labels)
mpls_push(mpls, pkt_dev);
+
+ if (pkt_dev->vlan_id != 0xffff) {
+ if(pkt_dev->svlan_id != 0xffff) {
+ svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+ *svlan_tci = htons(pkt_dev->svlan_id);
+ *svlan_tci |= pkt_dev->svlan_p << 5;
+ *svlan_tci |= pkt_dev->svlan_cfi << 4;
+ svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+ *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ }
+ vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+ *vlan_tci = htons(pkt_dev->vlan_id);
+ *vlan_tci |= pkt_dev->vlan_p << 5;
+ *vlan_tci |= pkt_dev->vlan_cfi << 4;
+ vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+ *vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
+ }
+
iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
@@ -2124,7 +2375,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
- pkt_dev->nr_labels*sizeof(u32);
+ pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
if (datalen < sizeof(struct pktgen_hdr))
datalen = sizeof(struct pktgen_hdr);
@@ -2136,7 +2387,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->ihl = 5;
iph->version = 4;
iph->ttl = 32;
- iph->tos = 0;
+ iph->tos = pkt_dev->tos;
iph->protocol = IPPROTO_UDP; /* UDP */
iph->saddr = pkt_dev->cur_saddr;
iph->daddr = pkt_dev->cur_daddr;
@@ -2146,9 +2397,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->check = 0;
iph->check = ip_fast_csum((void *)iph, iph->ihl);
skb->protocol = protocol;
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32);
+ skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ skb->nh.iph = iph;
+ skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2216,7 +2470,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
pgh->tv_sec = htonl(timestamp.tv_sec);
pgh->tv_usec = htonl(timestamp.tv_usec);
}
- pkt_dev->seq_num++;
return skb;
}
@@ -2400,17 +2653,26 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct pktgen_hdr *pgh = NULL;
__be16 protocol = __constant_htons(ETH_P_IPV6);
__be32 *mpls;
+ __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
+ __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
+ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
+ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
if (pkt_dev->nr_labels)
protocol = __constant_htons(ETH_P_MPLS_UC);
+ if (pkt_dev->vlan_id != 0xffff)
+ protocol = __constant_htons(ETH_P_8021Q);
+
/* Update any of the values, used when we're incrementing various
* fields.
*/
mod_cur_headers(pkt_dev);
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
- pkt_dev->nr_labels*sizeof(u32), GFP_ATOMIC);
+ pkt_dev->nr_labels*sizeof(u32) +
+ VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
+ GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -2423,16 +2685,34 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
if (pkt_dev->nr_labels)
mpls_push(mpls, pkt_dev);
+
+ if (pkt_dev->vlan_id != 0xffff) {
+ if(pkt_dev->svlan_id != 0xffff) {
+ svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+ *svlan_tci = htons(pkt_dev->svlan_id);
+ *svlan_tci |= pkt_dev->svlan_p << 5;
+ *svlan_tci |= pkt_dev->svlan_cfi << 4;
+ svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+ *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ }
+ vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+ *vlan_tci = htons(pkt_dev->vlan_id);
+ *vlan_tci |= pkt_dev->vlan_p << 5;
+ *vlan_tci |= pkt_dev->vlan_cfi << 4;
+ vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+ *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
+ }
+
iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
memcpy(eth, pkt_dev->hh, 12);
- *(u16 *) & eth[12] = __constant_htons(ETH_P_IPV6);
+ *(u16 *) & eth[12] = protocol;
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 -
sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
- pkt_dev->nr_labels*sizeof(u32);
+ pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
if (datalen < sizeof(struct pktgen_hdr)) {
datalen = sizeof(struct pktgen_hdr);
@@ -2448,6 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
*(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */
+ if (pkt_dev->traffic_class) {
+ /* Version + traffic class + flow (0) */
+ *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20));
+ }
+
iph->hop_limit = 32;
iph->payload_len = htons(sizeof(struct udphdr) + datalen);
@@ -2456,10 +2741,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32);
+ skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ skb->nh.ipv6h = iph;
+ skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2527,7 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
pgh->tv_sec = htonl(timestamp.tv_sec);
pgh->tv_usec = htonl(timestamp.tv_usec);
}
- pkt_dev->seq_num++;
+ /* pkt_dev->seq_num++; FF: you really mean this? */
return skb;
}
@@ -3173,6 +3461,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->udp_dst_min = 9;
pkt_dev->udp_dst_max = 9;
+ pkt_dev->vlan_p = 0;
+ pkt_dev->vlan_cfi = 0;
+ pkt_dev->vlan_id = 0xffff;
+ pkt_dev->svlan_p = 0;
+ pkt_dev->svlan_cfi = 0;
+ pkt_dev->svlan_id = 0xffff;
+
strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
if (!pktgen_setup_dev(pkt_dev)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3fcfa9c59e1f..221e4038216b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -16,7 +16,6 @@
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -36,6 +35,7 @@
#include <linux/init.h>
#include <linux/security.h>
#include <linux/mutex.h>
+#include <linux/if_addr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -50,6 +50,7 @@
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
+#include <net/fib_rules.h>
#include <net/netlink.h>
#ifdef CONFIG_NET_WIRELESS_RTNETLINK
#include <linux/wireless.h>
@@ -57,6 +58,7 @@
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
void rtnl_lock(void)
{
@@ -94,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
return 0;
}
-struct sock *rtnl;
-
struct rtnetlink_link * rtnetlink_links[NPROTO];
static const int rtm_min[RTM_NR_FAMILIES] =
@@ -103,8 +103,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
[RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
[RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
[RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
- [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)),
- [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
+ [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
[RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -112,7 +111,6 @@ static const int rtm_min[RTM_NR_FAMILIES] =
[RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
- [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
};
static const int rta_max[RTM_NR_FAMILIES] =
@@ -120,13 +118,11 @@ static const int rta_max[RTM_NR_FAMILIES] =
[RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
[RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
[RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
- [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX,
- [RTM_FAM(RTM_NEWRULE)] = RTA_MAX,
+ [RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
[RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
[RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
[RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
[RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
- [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -169,24 +165,52 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+ return nlmsg_unicast(rtnl, skb, pid);
+}
+
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+ struct nlmsghdr *nlh, gfp_t flags)
+{
+ int report = 0;
+
+ if (nlh)
+ report = nlmsg_report(nlh);
+
+ return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+ netlink_set_err(rtnl, 0, group, error);
+}
+
int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
- struct rtattr *mx = (struct rtattr*)skb->tail;
- int i;
+ struct nlattr *mx;
+ int i, valid = 0;
+
+ mx = nla_nest_start(skb, RTA_METRICS);
+ if (mx == NULL)
+ return -ENOBUFS;
+
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (metrics[i]) {
+ valid++;
+ NLA_PUT_U32(skb, i+1, metrics[i]);
+ }
+ }
- RTA_PUT(skb, RTA_METRICS, 0, NULL);
- for (i=0; i<RTAX_MAX; i++) {
- if (metrics[i])
- RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+ if (!valid) {
+ nla_nest_cancel(skb, mx);
+ return 0;
}
- mx->rta_len = skb->tail - (u8*)mx;
- if (mx->rta_len == RTA_LENGTH(0))
- skb_trim(skb, (u8*)mx - skb->data);
- return 0;
-rtattr_failure:
- skb_trim(skb, (u8*)mx - skb->data);
- return -1;
+ return nla_nest_end(skb, mx);
+
+nla_put_failure:
+ return nla_nest_cancel(skb, mx);
}
@@ -217,41 +241,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
}
}
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
- int type, u32 pid, u32 seq, u32 change,
- unsigned int flags)
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+ struct net_device_stats *b)
{
- struct ifinfomsg *r;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
- r = NLMSG_DATA(nlh);
- r->ifi_family = AF_UNSPEC;
- r->__ifi_pad = 0;
- r->ifi_type = dev->type;
- r->ifi_index = dev->ifindex;
- r->ifi_flags = dev_get_flags(dev);
- r->ifi_change = change;
-
- RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
-
- if (1) {
- u32 txqlen = dev->tx_queue_len;
- RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
- }
+ a->rx_packets = b->rx_packets;
+ a->tx_packets = b->tx_packets;
+ a->rx_bytes = b->rx_bytes;
+ a->tx_bytes = b->tx_bytes;
+ a->rx_errors = b->rx_errors;
+ a->tx_errors = b->tx_errors;
+ a->rx_dropped = b->rx_dropped;
+ a->tx_dropped = b->tx_dropped;
+
+ a->multicast = b->multicast;
+ a->collisions = b->collisions;
+
+ a->rx_length_errors = b->rx_length_errors;
+ a->rx_over_errors = b->rx_over_errors;
+ a->rx_crc_errors = b->rx_crc_errors;
+ a->rx_frame_errors = b->rx_frame_errors;
+ a->rx_fifo_errors = b->rx_fifo_errors;
+ a->rx_missed_errors = b->rx_missed_errors;
+
+ a->tx_aborted_errors = b->tx_aborted_errors;
+ a->tx_carrier_errors = b->tx_carrier_errors;
+ a->tx_fifo_errors = b->tx_fifo_errors;
+ a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+ a->tx_window_errors = b->tx_window_errors;
+
+ a->rx_compressed = b->rx_compressed;
+ a->tx_compressed = b->tx_compressed;
+};
- if (1) {
- u32 weight = dev->weight;
- RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
- }
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ void *iwbuf, int iwbuflen, int type, u32 pid,
+ u32 seq, u32 change, unsigned int flags)
+{
+ struct ifinfomsg *ifm;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ ifm = nlmsg_data(nlh);
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = dev->type;
+ ifm->ifi_index = dev->ifindex;
+ ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_change = change;
+
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+ NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+ NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+ NLA_PUT_U8(skb, IFLA_OPERSTATE,
+ netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+ NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+
+ if (dev->ifindex != dev->iflink)
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+ if (dev->master)
+ NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
- if (1) {
- u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN;
- u8 link_mode = dev->link_mode;
- RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
- RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
- }
+ if (dev->qdisc_sleeping)
+ NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
if (1) {
struct rtnl_link_ifmap map = {
@@ -262,58 +318,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
.dma = dev->dma,
.port = dev->if_port,
};
- RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+ NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
}
if (dev->addr_len) {
- RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
- RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
- }
-
- if (1) {
- u32 mtu = dev->mtu;
- RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
- }
-
- if (dev->ifindex != dev->iflink) {
- u32 iflink = dev->iflink;
- RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
- }
-
- if (dev->qdisc_sleeping)
- RTA_PUT(skb, IFLA_QDISC,
- strlen(dev->qdisc_sleeping->ops->id) + 1,
- dev->qdisc_sleeping->ops->id);
-
- if (dev->master) {
- u32 master = dev->master->ifindex;
- RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
if (dev->get_stats) {
- unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+ struct net_device_stats *stats = dev->get_stats(dev);
if (stats) {
- struct rtattr *a;
- __u32 *s;
- int i;
- int n = sizeof(struct rtnl_link_stats)/4;
-
- a = __RTA_PUT(skb, IFLA_STATS, n*4);
- s = RTA_DATA(a);
- for (i=0; i<n; i++)
- s[i] = stats[i];
+ struct nlattr *attr;
+
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (attr == NULL)
+ goto nla_put_failure;
+
+ copy_rtnl_link_stats(nla_data(attr), stats);
}
}
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ if (iwbuf)
+ NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
-static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
@@ -323,10 +359,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
- if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI) <= 0)
+ if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
break;
}
read_unlock(&dev_base_lock);
@@ -335,52 +370,70 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
return skb->len;
}
-static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+ [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+ [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
+ [IFLA_MTU] = { .type = NLA_U32 },
+ [IFLA_TXQLEN] = { .type = NLA_U32 },
+ [IFLA_WEIGHT] = { .type = NLA_U32 },
+ [IFLA_OPERSTATE] = { .type = NLA_U8 },
+ [IFLA_LINKMODE] = { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct ifinfomsg *ifm = NLMSG_DATA(nlh);
- struct rtattr **ida = arg;
+ struct ifinfomsg *ifm;
struct net_device *dev;
- int err, send_addr_notify = 0;
+ int err, send_addr_notify = 0, modified = 0;
+ struct nlattr *tb[IFLA_MAX+1];
+ char ifname[IFNAMSIZ];
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ if (err < 0)
+ goto errout;
+
+ if (tb[IFLA_IFNAME])
+ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ else
+ ifname[0] = '\0';
+
+ err = -EINVAL;
+ ifm = nlmsg_data(nlh);
if (ifm->ifi_index >= 0)
dev = dev_get_by_index(ifm->ifi_index);
- else if (ida[IFLA_IFNAME - 1]) {
- char ifname[IFNAMSIZ];
-
- if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
- IFNAMSIZ) >= IFNAMSIZ)
- return -EINVAL;
+ else if (tb[IFLA_IFNAME])
dev = dev_get_by_name(ifname);
- } else
- return -EINVAL;
+ else
+ goto errout;
- if (!dev)
- return -ENODEV;
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+ }
- err = -EINVAL;
+ if (tb[IFLA_ADDRESS] &&
+ nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+ goto errout_dev;
- if (ifm->ifi_flags)
- dev_change_flags(dev, ifm->ifi_flags);
+ if (tb[IFLA_BROADCAST] &&
+ nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+ goto errout_dev;
- if (ida[IFLA_MAP - 1]) {
+ if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
if (!dev->set_config) {
err = -EOPNOTSUPP;
- goto out;
+ goto errout_dev;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
- goto out;
+ goto errout_dev;
}
-
- if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
- goto out;
-
- u_map = RTA_DATA(ida[IFLA_MAP - 1]);
+ u_map = nla_data(tb[IFLA_MAP]);
k_map.mem_start = (unsigned long) u_map->mem_start;
k_map.mem_end = (unsigned long) u_map->mem_end;
k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -389,187 +442,175 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
k_map.port = (unsigned char) u_map->port;
err = dev->set_config(dev, &k_map);
+ if (err < 0)
+ goto errout_dev;
- if (err)
- goto out;
+ modified = 1;
}
- if (ida[IFLA_ADDRESS - 1]) {
+ if (tb[IFLA_ADDRESS]) {
+ struct sockaddr *sa;
+ int len;
+
if (!dev->set_mac_address) {
err = -EOPNOTSUPP;
- goto out;
+ goto errout_dev;
}
+
if (!netif_device_present(dev)) {
err = -ENODEV;
- goto out;
+ goto errout_dev;
}
- if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
- goto out;
- err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+ len = sizeof(sa_family_t) + dev->addr_len;
+ sa = kmalloc(len, GFP_KERNEL);
+ if (!sa) {
+ err = -ENOMEM;
+ goto errout_dev;
+ }
+ sa->sa_family = dev->type;
+ memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+ dev->addr_len);
+ err = dev->set_mac_address(dev, sa);
+ kfree(sa);
if (err)
- goto out;
+ goto errout_dev;
send_addr_notify = 1;
+ modified = 1;
}
- if (ida[IFLA_BROADCAST - 1]) {
- if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
- goto out;
- memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
- dev->addr_len);
- send_addr_notify = 1;
+ if (tb[IFLA_MTU]) {
+ err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+ if (err < 0)
+ goto errout_dev;
+ modified = 1;
}
- if (ida[IFLA_MTU - 1]) {
- if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
- goto out;
- err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
-
- if (err)
- goto out;
-
+ /*
+ * Interface selected by interface index but interface
+ * name provided implies that a name change has been
+ * requested.
+ */
+ if (ifm->ifi_index >= 0 && ifname[0]) {
+ err = dev_change_name(dev, ifname);
+ if (err < 0)
+ goto errout_dev;
+ modified = 1;
}
- if (ida[IFLA_TXQLEN - 1]) {
- if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
- goto out;
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+ if (tb[IFLA_WIRELESS]) {
+ /* Call Wireless Extensions.
+ * Various stuff checked in there... */
+ err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
+ nla_len(tb[IFLA_WIRELESS]));
+ if (err < 0)
+ goto errout_dev;
+ }
+#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
- dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+ if (tb[IFLA_BROADCAST]) {
+ nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+ send_addr_notify = 1;
}
- if (ida[IFLA_WEIGHT - 1]) {
- if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
- goto out;
- dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
- }
+ if (ifm->ifi_flags)
+ dev_change_flags(dev, ifm->ifi_flags);
- if (ida[IFLA_OPERSTATE - 1]) {
- if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
- goto out;
+ if (tb[IFLA_TXQLEN])
+ dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
- }
+ if (tb[IFLA_WEIGHT])
+ dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
- if (ida[IFLA_LINKMODE - 1]) {
- if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
- goto out;
+ if (tb[IFLA_OPERSTATE])
+ set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+ if (tb[IFLA_LINKMODE]) {
write_lock_bh(&dev_base_lock);
- dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
+ dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
write_unlock_bh(&dev_base_lock);
}
- if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
- char ifname[IFNAMSIZ];
-
- if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
- IFNAMSIZ) >= IFNAMSIZ)
- goto out;
- err = dev_change_name(dev, ifname);
- if (err)
- goto out;
- }
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (ida[IFLA_WIRELESS - 1]) {
-
- /* Call Wireless Extensions.
- * Various stuff checked in there... */
- err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len);
- if (err)
- goto out;
- }
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
err = 0;
-out:
+errout_dev:
+ if (err < 0 && modified && net_ratelimit())
+ printk(KERN_WARNING "A link change request failed with "
+ "some changes comitted already. Interface %s may "
+ "have been left with an inconsistent configuration, "
+ "please check.\n", dev->name);
+
if (send_addr_notify)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
dev_put(dev);
+errout:
return err;
}
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct ifinfomsg *ifm = NLMSG_DATA(in_nlh);
- struct rtattr **ida = arg;
- struct net_device *dev;
- struct ifinfomsg *r;
- struct nlmsghdr *nlh;
- int err = -ENOBUFS;
- struct sk_buff *skb;
- unsigned char *b;
- char *iw_buf = NULL;
+ struct ifinfomsg *ifm;
+ struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *dev = NULL;
+ struct sk_buff *nskb;
+ char *iw_buf = NULL, *iw = NULL;
int iw_buf_len = 0;
+ int err, payload;
- if (ifm->ifi_index >= 0)
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ if (err < 0)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_index >= 0) {
dev = dev_get_by_index(ifm->ifi_index);
- else
+ if (dev == NULL)
+ return -ENODEV;
+ } else
return -EINVAL;
- if (!dev)
- return -ENODEV;
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (ida[IFLA_WIRELESS - 1]) {
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+ if (tb[IFLA_WIRELESS]) {
/* Call Wireless Extensions. We need to know the size before
* we can alloc. Various stuff checked in there... */
- err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len);
- if (err)
- goto out;
+ err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
+ nla_len(tb[IFLA_WIRELESS]),
+ &iw_buf, &iw_buf_len);
+ if (err < 0)
+ goto errout;
+
+ iw += IW_EV_POINT_OFF;
}
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
- /* Create a skb big enough to include all the data.
- * Some requests are way bigger than 4k... Jean II */
- skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)),
- GFP_KERNEL);
- if (!skb)
- goto out;
- b = skb->tail;
-
- /* Put in the message the usual good stuff */
- nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq,
- RTM_NEWLINK, sizeof(*r));
- r = NLMSG_DATA(nlh);
- r->ifi_family = AF_UNSPEC;
- r->__ifi_pad = 0;
- r->ifi_type = dev->type;
- r->ifi_index = dev->ifindex;
- r->ifi_flags = dev->flags;
- r->ifi_change = 0;
-
- /* Put the wireless payload if it exist */
- if(iw_buf != NULL)
- RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len,
- iw_buf + IW_EV_POINT_OFF);
-
- nlh->nlmsg_len = skb->tail - b;
-
- /* Needed ? */
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
- err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
- if (err > 0)
- err = 0;
-out:
- if(iw_buf != NULL)
- kfree(iw_buf);
+ payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
+ nla_total_size(iw_buf_len));
+ nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+ if (nskb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+ NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+ if (err <= 0) {
+ kfree_skb(nskb);
+ goto errout;
+ }
+
+ err = rtnl_unicast(skb, NETLINK_CB(skb).pid);
+errout:
+ kfree(iw_buf);
dev_put(dev);
- return err;
-rtattr_failure:
-nlmsg_failure:
- kfree_skb(skb);
- goto out;
+ return err;
}
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
@@ -596,20 +637,22 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
- sizeof(struct rtnl_link_ifmap) +
- sizeof(struct rtnl_link_stats) + 128);
+ int err = -ENOBUFS;
- skb = alloc_skb(size, GFP_KERNEL);
- if (!skb)
- return;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
- if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+ err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+ if (err < 0) {
kfree_skb(skb);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_LINK, err);
}
/* Protected by RTNL sempahore. */
@@ -663,7 +706,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && security_netlink_recv(skb)) {
+ if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
*errp = -EPERM;
return -1;
}
@@ -734,18 +777,19 @@ static void rtnetlink_rcv(struct sock *sk, int len)
static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
{
- [RTM_GETLINK - RTM_BASE] = {
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- .doit = do_getlink,
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
- .dumpit = rtnetlink_dump_ifinfo },
- [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
- [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
+ [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
+ .dumpit = rtnl_dump_ifinfo },
+ [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
+ [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
+ [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
[RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
[RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
- [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
+#ifdef CONFIG_FIB_RULES
+ [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
+ [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
+#endif
+ [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
[RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
};
@@ -805,7 +849,9 @@ EXPORT_SYMBOL(rtattr_strlcpy);
EXPORT_SYMBOL(rtattr_parse);
EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_trylock);
EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e5044ba3ab6..c448c7f6fde2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -38,7 +38,6 @@
* The functions in this file will not compile correctly with gcc 2.4.x
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -250,12 +249,37 @@ nodata:
goto out;
}
+/**
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ * @dev: network device to receive on
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+ unsigned int length, gfp_t gfp_mask)
+{
+ struct sk_buff *skb;
-static void skb_drop_fraglist(struct sk_buff *skb)
+ skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = dev;
+ }
+ return skb;
+}
+
+static void skb_drop_list(struct sk_buff **listp)
{
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ struct sk_buff *list = *listp;
- skb_shinfo(skb)->frag_list = NULL;
+ *listp = NULL;
do {
struct sk_buff *this = list;
@@ -264,6 +288,11 @@ static void skb_drop_fraglist(struct sk_buff *skb)
} while (list);
}
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+ skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
@@ -272,7 +301,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-void skb_release_data(struct sk_buff *skb)
+static void skb_release_data(struct sk_buff *skb)
{
if (!skb->cloned ||
!atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
@@ -824,41 +853,81 @@ free_skb:
int ___pskb_trim(struct sk_buff *skb, unsigned int len)
{
+ struct sk_buff **fragp;
+ struct sk_buff *frag;
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int i;
+ int err;
- for (i = 0; i < nfrags; i++) {
+ if (skb_cloned(skb) &&
+ unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+ return err;
+
+ i = 0;
+ if (offset >= len)
+ goto drop_pages;
+
+ for (; i < nfrags; i++) {
int end = offset + skb_shinfo(skb)->frags[i].size;
- if (end > len) {
- if (skb_cloned(skb)) {
- if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
- if (len <= offset) {
- put_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->nr_frags--;
- } else {
- skb_shinfo(skb)->frags[i].size = len - offset;
- }
+
+ if (end < len) {
+ offset = end;
+ continue;
}
- offset = end;
+
+ skb_shinfo(skb)->frags[i++].size = len - offset;
+
+drop_pages:
+ skb_shinfo(skb)->nr_frags = i;
+
+ for (; i < nfrags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_drop_fraglist(skb);
+ goto done;
+ }
+
+ for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+ fragp = &frag->next) {
+ int end = offset + frag->len;
+
+ if (skb_shared(frag)) {
+ struct sk_buff *nfrag;
+
+ nfrag = skb_clone(frag, GFP_ATOMIC);
+ if (unlikely(!nfrag))
+ return -ENOMEM;
+
+ nfrag->next = frag->next;
+ kfree_skb(frag);
+ frag = nfrag;
+ *fragp = frag;
+ }
+
+ if (end < len) {
+ offset = end;
+ continue;
+ }
+
+ if (end > len &&
+ unlikely((err = pskb_trim(frag, len - offset))))
+ return err;
+
+ if (frag->next)
+ skb_drop_list(&frag->next);
+ break;
}
- if (offset < len) {
+done:
+ if (len > skb_headlen(skb)) {
skb->data_len -= skb->len - len;
skb->len = len;
} else {
- if (len <= skb_headlen(skb)) {
- skb->len = len;
- skb->data_len = 0;
- skb->tail = skb->data + len;
- if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
- skb_drop_fraglist(skb);
- } else {
- skb->data_len -= skb->len - len;
- skb->len = len;
- }
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
}
return 0;
@@ -1328,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
unsigned int csum;
long csstart;
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
csstart = skb->h.raw - skb->data;
else
csstart = skb_headlen(skb);
@@ -1342,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
skb->len - csstart, 0);
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
long csstuff = csstart + skb->csum;
*((unsigned short *)(to + csstuff)) = csum_fold(csum);
@@ -1739,12 +1808,15 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
unsigned int to, struct ts_config *config,
struct ts_state *state)
{
+ unsigned int ret;
+
config->get_next_block = skb_ts_get_next_block;
config->finish = skb_ts_finish;
skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
- return textsearch_find(config, state);
+ ret = textsearch_find(config, state);
+ return (ret <= to - from ? ret : UINT_MAX);
}
/**
@@ -1826,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
* @len: length of data pulled
*
* This function performs an skb_pull on the packet and updates
- * update the CHECKSUM_HW checksum. It should be used on receive
- * path processing instead of skb_pull unless you know that the
- * checksum difference is zero (e.g., a valid IP header) or you
- * are setting ip_summed to CHECKSUM_NONE.
+ * update the CHECKSUM_COMPLETE checksum. It should be used on
+ * receive path processing instead of skb_pull unless you know
+ * that the checksum difference is zero (e.g., a valid IP header)
+ * or you are setting ip_summed to CHECKSUM_NONE.
*/
unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
{
@@ -1845,13 +1917,13 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
* skb_segment - Perform protocol segmentation on skb.
* @skb: buffer to segment
- * @sg: whether scatter-gather can be used for generated segments
+ * @features: features for the output path (see dev->features)
*
* This function performs segmentation on the given skb. It returns
* the segment at the given position. It returns NULL if there are
* no more segments to generate, or when an error is encountered.
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
+struct sk_buff *skb_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
@@ -1860,6 +1932,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
+ int sg = features & NETIF_F_SG;
int nfrags = skb_shinfo(skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
@@ -1921,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
frag = skb_shinfo(nskb)->frags;
k = 0;
- nskb->ip_summed = CHECKSUM_HW;
+ nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum = skb->csum;
memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
@@ -1973,19 +2046,14 @@ void __init skb_init(void)
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
sizeof(struct sk_buff),
0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!skbuff_head_cache)
- panic("cannot create skbuff cache");
-
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
(2*sizeof(struct sk_buff)) +
sizeof(atomic_t),
0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!skbuff_fclone_cache)
- panic("cannot create skbuff cache");
}
EXPORT_SYMBOL(___pskb_trim);
@@ -1993,6 +2061,7 @@ EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
EXPORT_SYMBOL(__alloc_skb);
+EXPORT_SYMBOL(__netdev_alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5d820c376653..b77e155cbe6c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -92,7 +92,6 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -130,6 +129,53 @@
#include <net/tcp.h>
#endif
+/*
+ * Each address family might have different locking rules, so we have
+ * one slock key per address family:
+ */
+static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_slock_keys[AF_MAX];
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * Make lock validator output more readable. (we pre-construct these
+ * strings build-time, so that runtime initialization of socket
+ * locks is fast):
+ */
+static const char *af_family_key_strings[AF_MAX+1] = {
+ "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
+ "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
+ "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
+ "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
+ "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
+ "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
+ "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
+ "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
+ "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
+ "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
+ "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+};
+static const char *af_family_slock_key_strings[AF_MAX+1] = {
+ "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
+ "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
+ "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
+ "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
+ "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
+ "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
+ "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
+ "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
+ "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
+ "slock-27" , "slock-28" , "slock-29" ,
+ "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+};
+#endif
+
+/*
+ * sk_callback_lock locking rules are per-address-family,
+ * so split the lock classes by using a per-AF key:
+ */
+static struct lock_class_key af_callback_keys[AF_MAX];
+
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
* platforms. This makes socket queueing behavior and performance
@@ -141,13 +187,13 @@
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
/* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
/* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
@@ -201,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto out;
}
- /* It would be deadlock, if sock_queue_rcv_skb is used
- with socket lock! We assume that users of this
- function are lock free.
- */
- err = sk_filter(sk, skb, 1);
+ err = sk_filter(sk, skb);
if (err)
goto out;
@@ -232,15 +274,22 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
{
int rc = NET_RX_SUCCESS;
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk)) {
+ /*
+ * trylock + unlock semantics:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+
rc = sk->sk_backlog_rcv(sk, skb);
- else
+
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ } else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
out:
@@ -553,18 +602,25 @@ set_rcvbuf:
break;
case SO_DETACH_FILTER:
- spin_lock_bh(&sk->sk_lock.slock);
- filter = sk->sk_filter;
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
- sk->sk_filter = NULL;
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_release(sk, filter);
+ rcu_read_unlock_bh();
break;
}
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_read_unlock_bh();
ret = -ENONET;
break;
+ case SO_PASSSEC:
+ if (valbool)
+ set_bit(SOCK_PASSSEC, &sock->flags);
+ else
+ clear_bit(SOCK_PASSSEC, &sock->flags);
+ break;
+
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
@@ -723,6 +779,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_state == TCP_LISTEN;
break;
+ case SO_PASSSEC:
+ v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+ break;
+
case SO_PEERSEC:
return security_socket_getpeersec_stream(sock, optval, optlen, len);
@@ -739,6 +799,33 @@ lenout:
return 0;
}
+/*
+ * Initialize an sk_lock.
+ *
+ * (We also register the sk_lock with the lock validator.)
+ */
+static void inline sock_lock_init(struct sock *sk)
+{
+ spin_lock_init(&sk->sk_lock.slock);
+ sk->sk_lock.owner = NULL;
+ init_waitqueue_head(&sk->sk_lock.wq);
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
+
+ /*
+ * Mark both the sk_lock and the sk_lock.slock as a
+ * per-address-family lock class:
+ */
+ lockdep_set_class_and_name(&sk->sk_lock.slock,
+ af_family_slock_keys + sk->sk_family,
+ af_family_slock_key_strings[sk->sk_family]);
+ lockdep_init_map(&sk->sk_lock.dep_map,
+ af_family_key_strings[sk->sk_family],
+ af_family_keys + sk->sk_family);
+}
+
/**
* sk_alloc - All socket objects are allocated here
* @family: protocol family
@@ -793,10 +880,10 @@ void sk_free(struct sock *sk)
if (sk->sk_destruct)
sk->sk_destruct(sk);
- filter = sk->sk_filter;
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
sk_filter_release(sk, filter);
- sk->sk_filter = NULL;
+ rcu_assign_pointer(sk->sk_filter, NULL);
}
sock_disable_timestamp(sk);
@@ -820,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
if (newsk != NULL) {
struct sk_filter *filter;
- memcpy(newsk, sk, sk->sk_prot->obj_size);
+ sock_copy(newsk, sk);
/* SANITY */
sk_node_init(&newsk->sk_node);
@@ -838,6 +925,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
rwlock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
+ lockdep_set_class(&newsk->sk_callback_lock,
+ af_callback_keys + newsk->sk_family);
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
@@ -1412,6 +1501,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
rwlock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family);
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
@@ -1439,24 +1530,34 @@ void sock_init_data(struct socket *sock, struct sock *sk)
void fastcall lock_sock(struct sock *sk)
{
might_sleep();
- spin_lock_bh(&(sk->sk_lock.slock));
+ spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owner)
__lock_sock(sk);
sk->sk_lock.owner = (void *)1;
- spin_unlock_bh(&(sk->sk_lock.slock));
+ spin_unlock(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ local_bh_enable();
}
EXPORT_SYMBOL(lock_sock);
void fastcall release_sock(struct sock *sk)
{
- spin_lock_bh(&(sk->sk_lock.slock));
+ /*
+ * The sk_lock has mutex_unlock() semantics:
+ */
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+
+ spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
sk->sk_lock.owner = NULL;
- if (waitqueue_active(&(sk->sk_lock.wq)))
- wake_up(&(sk->sk_lock.wq));
- spin_unlock_bh(&(sk->sk_lock.slock));
+ if (waitqueue_active(&sk->sk_lock.wq))
+ wake_up(&sk->sk_lock.wq);
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(release_sock);
diff --git a/net/core/stream.c b/net/core/stream.c
index e9489696f694..d1d7decf70b0 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -196,15 +196,13 @@ EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
- if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
- atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
- sk->sk_prot->memory_allocated);
- sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
- if (*sk->sk_prot->memory_pressure &&
- (atomic_read(sk->sk_prot->memory_allocated) <
- sk->sk_prot->sysctl_mem[0]))
- *sk->sk_prot->memory_pressure = 0;
- }
+ atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+ sk->sk_prot->memory_allocated);
+ sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
+ if (*sk->sk_prot->memory_pressure &&
+ (atomic_read(sk->sk_prot->memory_allocated) <
+ sk->sk_prot->sysctl_mem[0]))
+ *sk->sk_prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 710453656721..02534131d88e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -7,7 +7,6 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <net/sock.h>
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index b7c98dbcdb81..248a6b666aff 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -29,6 +29,7 @@
#include <linux/socket.h>
#include <linux/rtnetlink.h> /* for BUG_TRAP */
#include <net/tcp.h>
+#include <net/netdma.h>
#define NET_DMA_DEFAULT_COPYBREAK 4096
diff --git a/net/core/utils.c b/net/core/utils.c
index 4f96f389243d..94c5d761c830 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -3,7 +3,8 @@
*
* Authors:
* net_random Alan Cox
- * net_ratelimit Andy Kleen
+ * net_ratelimit Andi Kleen
+ * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
*
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
@@ -130,12 +131,13 @@ void __init net_random_init(void)
static int net_random_reseed(void)
{
int i;
- unsigned long seed[NR_CPUS];
+ unsigned long seed;
- get_random_bytes(seed, sizeof(seed));
for_each_possible_cpu(i) {
struct nrnd_state *state = &per_cpu(net_rand_state,i);
- __net_srandom(state, seed[i]);
+
+ get_random_bytes(&seed, sizeof(seed));
+ __net_srandom(state, seed);
}
return 0;
}
@@ -190,3 +192,215 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+#define IN6PTON_XDIGIT 0x00010000
+#define IN6PTON_DIGIT 0x00020000
+#define IN6PTON_COLON_MASK 0x00700000
+#define IN6PTON_COLON_1 0x00100000 /* single : requested */
+#define IN6PTON_COLON_2 0x00200000 /* second : requested */
+#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */
+#define IN6PTON_DOT 0x00800000 /* . */
+#define IN6PTON_DELIM 0x10000000
+#define IN6PTON_NULL 0x20000000 /* first/tail */
+#define IN6PTON_UNKNOWN 0x40000000
+
+static inline int digit2bin(char c, char delim)
+{
+ if (c == delim || c == '\0')
+ return IN6PTON_DELIM;
+ if (c == '.')
+ return IN6PTON_DOT;
+ if (c >= '0' && c <= '9')
+ return (IN6PTON_DIGIT | (c - '0'));
+ return IN6PTON_UNKNOWN;
+}
+
+static inline int xdigit2bin(char c, char delim)
+{
+ if (c == delim || c == '\0')
+ return IN6PTON_DELIM;
+ if (c == ':')
+ return IN6PTON_COLON_MASK;
+ if (c == '.')
+ return IN6PTON_DOT;
+ if (c >= '0' && c <= '9')
+ return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
+ if (c >= 'a' && c <= 'f')
+ return (IN6PTON_XDIGIT | (c - 'a' + 10));
+ if (c >= 'A' && c <= 'F')
+ return (IN6PTON_XDIGIT | (c - 'A' + 10));
+ return IN6PTON_UNKNOWN;
+}
+
+int in4_pton(const char *src, int srclen,
+ u8 *dst,
+ char delim, const char **end)
+{
+ const char *s;
+ u8 *d;
+ u8 dbuf[4];
+ int ret = 0;
+ int i;
+ int w = 0;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+ s = src;
+ d = dbuf;
+ i = 0;
+ while(1) {
+ int c;
+ c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+ if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) {
+ goto out;
+ }
+ if (c & (IN6PTON_DOT | IN6PTON_DELIM)) {
+ if (w == 0)
+ goto out;
+ *d++ = w & 0xff;
+ w = 0;
+ i++;
+ if (c & IN6PTON_DELIM) {
+ if (i != 4)
+ goto out;
+ break;
+ }
+ goto cont;
+ }
+ w = (w * 10) + c;
+ if ((w & 0xffff) > 255) {
+ goto out;
+ }
+cont:
+ if (i >= 4)
+ goto out;
+ s++;
+ srclen--;
+ }
+ ret = 1;
+ memcpy(dst, dbuf, sizeof(dbuf));
+out:
+ if (end)
+ *end = s;
+ return ret;
+}
+
+EXPORT_SYMBOL(in4_pton);
+
+int in6_pton(const char *src, int srclen,
+ u8 *dst,
+ char delim, const char **end)
+{
+ const char *s, *tok = NULL;
+ u8 *d, *dc = NULL;
+ u8 dbuf[16];
+ int ret = 0;
+ int i;
+ int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
+ int w = 0;
+
+ memset(dbuf, 0, sizeof(dbuf));
+
+ s = src;
+ d = dbuf;
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ while (1) {
+ int c;
+
+ c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+ if (!(c & state))
+ goto out;
+ if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+ /* process one 16-bit word */
+ if (!(state & IN6PTON_NULL)) {
+ *d++ = (w >> 8) & 0xff;
+ *d++ = w & 0xff;
+ }
+ w = 0;
+ if (c & IN6PTON_DELIM) {
+ /* We've processed last word */
+ break;
+ }
+ /*
+ * COLON_1 => XDIGIT
+ * COLON_2 => XDIGIT|DELIM
+ * COLON_1_2 => COLON_2
+ */
+ switch (state & IN6PTON_COLON_MASK) {
+ case IN6PTON_COLON_2:
+ dc = d;
+ state = IN6PTON_XDIGIT | IN6PTON_DELIM;
+ if (dc - dbuf >= sizeof(dbuf))
+ state |= IN6PTON_NULL;
+ break;
+ case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
+ state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
+ break;
+ case IN6PTON_COLON_1:
+ state = IN6PTON_XDIGIT;
+ break;
+ case IN6PTON_COLON_1_2:
+ state = IN6PTON_COLON_2;
+ break;
+ default:
+ state = 0;
+ }
+ tok = s + 1;
+ goto cont;
+ }
+
+ if (c & IN6PTON_DOT) {
+ ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
+ if (ret > 0) {
+ d += 4;
+ break;
+ }
+ goto out;
+ }
+
+ w = (w << 4) | (0xff & c);
+ state = IN6PTON_COLON_1 | IN6PTON_DELIM;
+ if (!(w & 0xf000)) {
+ state |= IN6PTON_XDIGIT;
+ }
+ if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
+ state |= IN6PTON_COLON_1_2;
+ state &= ~IN6PTON_DELIM;
+ }
+ if (d + 2 >= dbuf + sizeof(dbuf)) {
+ state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
+ }
+cont:
+ if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
+ d + 4 == dbuf + sizeof(dbuf)) {
+ state |= IN6PTON_DOT;
+ }
+ if (d >= dbuf + sizeof(dbuf)) {
+ state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
+ }
+ s++;
+ srclen--;
+ }
+
+ i = 15; d--;
+
+ if (dc) {
+ while(d >= dc)
+ dst[i--] = *d--;
+ while(i >= dc - dbuf)
+ dst[i--] = 0;
+ while(i >= 0)
+ dst[i--] = *d--;
+ } else
+ memcpy(dst, dbuf, sizeof(dbuf));
+
+ ret = 1;
+out:
+ if (end)
+ *end = s;
+ return ret;
+}
+
+EXPORT_SYMBOL(in6_pton);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index d2bc72d318f7..ffff0da46c6e 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -68,11 +68,18 @@
*
* v8 - 17.02.06 - Jean II
* o RtNetlink requests support (SET/GET)
+ *
+ * v8b - 03.08.06 - Herbert Xu
+ * o Fix Wireless Event locking issues.
+ *
+ * v9 - 14.3.06 - Jean II
+ * o Change length in ESSID and NICK to strlen() instead of strlen()+1
+ * o Make standard_ioctl_num and standard_event_num unsigned
+ * o Remove (struct net_device *)->get_wireless_stats()
*/
/***************************** INCLUDES *****************************/
-#include <linux/config.h> /* Not needed ??? */
#include <linux/module.h>
#include <linux/types.h> /* off_t */
#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */
@@ -82,9 +89,11 @@
#include <linux/init.h> /* for __init */
#include <linux/if_arp.h> /* ARPHRD_ETHER */
#include <linux/etherdevice.h> /* compare_ether_addr */
+#include <linux/interrupt.h>
#include <linux/wireless.h> /* Pretty obvious */
#include <net/iw_handler.h> /* New driver API */
+#include <net/netlink.h>
#include <asm/uaccess.h> /* copy_to_user() */
@@ -233,24 +242,24 @@ static const struct iw_ioctl_description standard_ioctl[] = {
[SIOCSIWESSID - SIOCIWFIRST] = {
.header_type = IW_HEADER_TYPE_POINT,
.token_size = 1,
- .max_tokens = IW_ESSID_MAX_SIZE + 1,
+ .max_tokens = IW_ESSID_MAX_SIZE,
.flags = IW_DESCR_FLAG_EVENT,
},
[SIOCGIWESSID - SIOCIWFIRST] = {
.header_type = IW_HEADER_TYPE_POINT,
.token_size = 1,
- .max_tokens = IW_ESSID_MAX_SIZE + 1,
+ .max_tokens = IW_ESSID_MAX_SIZE,
.flags = IW_DESCR_FLAG_DUMP,
},
[SIOCSIWNICKN - SIOCIWFIRST] = {
.header_type = IW_HEADER_TYPE_POINT,
.token_size = 1,
- .max_tokens = IW_ESSID_MAX_SIZE + 1,
+ .max_tokens = IW_ESSID_MAX_SIZE,
},
[SIOCGIWNICKN - SIOCIWFIRST] = {
.header_type = IW_HEADER_TYPE_POINT,
.token_size = 1,
- .max_tokens = IW_ESSID_MAX_SIZE + 1,
+ .max_tokens = IW_ESSID_MAX_SIZE,
},
[SIOCSIWRATE - SIOCIWFIRST] = {
.header_type = IW_HEADER_TYPE_PARAM,
@@ -337,8 +346,8 @@ static const struct iw_ioctl_description standard_ioctl[] = {
.max_tokens = sizeof(struct iw_pmksa),
},
};
-static const int standard_ioctl_num = (sizeof(standard_ioctl) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
+ sizeof(struct iw_ioctl_description));
/*
* Meta-data about all the additional standard Wireless Extension events
@@ -388,8 +397,8 @@ static const struct iw_ioctl_description standard_event[] = {
.max_tokens = sizeof(struct iw_pmkid_cand),
},
};
-static const int standard_event_num = (sizeof(standard_event) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = (sizeof(standard_event) /
+ sizeof(struct iw_ioctl_description));
/* Size (in bytes) of the various private data types */
static const char iw_priv_type_size[] = {
@@ -464,17 +473,6 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
(dev->wireless_handlers->get_wireless_stats != NULL))
return dev->wireless_handlers->get_wireless_stats(dev);
- /* Old location, field to be removed in next WE */
- if(dev->get_wireless_stats) {
- static int printed_message;
-
- if (!printed_message++)
- printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n",
- dev->name);
-
- return dev->get_wireless_stats(dev);
- }
-
/* Not found */
return (struct iw_statistics *) NULL;
}
@@ -1844,6 +1842,43 @@ int wireless_rtnetlink_set(struct net_device * dev,
#ifdef WE_EVENT_RTNETLINK
/* ---------------------------------------------------------------- */
/*
+ * Locking...
+ * ----------
+ *
+ * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing
+ * the locking issue in here and implementing this code !
+ *
+ * The issue : wireless_send_event() is often called in interrupt context,
+ * while the Netlink layer can never be called in interrupt context.
+ * The fully formed RtNetlink events are queued, and then a tasklet is run
+ * to feed those to Netlink.
+ * The skb_queue is interrupt safe, and its lock is not held while calling
+ * Netlink, so there is no possibility of dealock.
+ * Jean II
+ */
+
+static struct sk_buff_head wireless_nlevent_queue;
+
+static int __init wireless_nlevent_init(void)
+{
+ skb_queue_head_init(&wireless_nlevent_queue);
+ return 0;
+}
+
+subsys_initcall(wireless_nlevent_init);
+
+static void wireless_nlevent_process(unsigned long data)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&wireless_nlevent_queue)))
+ rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+}
+
+static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+
+/* ---------------------------------------------------------------- */
+/*
* Fill a rtnetlink message with our event data.
* Note that we propage only the specified event and don't dump the
* current wireless config. Dumping the wireless config is far too
@@ -1904,8 +1939,10 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
return;
}
NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+ skb_queue_tail(&wireless_nlevent_queue, skb);
+ tasklet_schedule(&wireless_nlevent_tasklet);
}
+
#endif /* WE_EVENT_RTNETLINK */
/* ---------------------------------------------------------------- */
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 859e3359fcda..e2a095d0fd80 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -40,6 +40,22 @@ config IP_DCCP_DEBUG
Just say N.
+config NET_DCCPPROBE
+ tristate "DCCP connection probing"
+ depends on PROC_FS && KPROBES
+ ---help---
+ This module allows for capturing the changes to DCCP connection
+ state in response to incoming packets. It is used for debugging
+ DCCP congestion avoidance modules. If you don't understand
+ what was just said, you don't need it: say N.
+
+ Documentation on how to use the packet generator can be found
+ at http://linux-net.osdl.org/index.php/DccpProbe
+
+ To compile this code as a module, choose M here: the
+ module will be called dccp_probe.
+
+
endmenu
endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 7696e219b05d..17ed99c46617 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
dccp-$(CONFIG_SYSCTL) += sysctl.o
dccp_diag-y := diag.o
+dccp_probe-y := probe.o
obj-y += ccids/
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 8c211c58893b..4d176d33983f 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
if (av != NULL) {
- av->dccpav_buf_head =
- av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
+ av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
av->dccpav_ack_ptr = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
- av->dccpav_sent_len = av->dccpav_vec_len = 0;
+ av->dccpav_vec_len = 0;
INIT_LIST_HEAD(&av->dccpav_records);
}
@@ -353,11 +352,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
{
struct dccp_ackvec_record *next;
- av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1;
- if (av->dccpav_buf_tail == 0)
- av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
-
- av->dccpav_vec_len -= avr->dccpavr_sent_len;
+ /* sort out vector length */
+ if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
+ av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
+ else
+ av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
+ - av->dccpav_buf_head
+ + avr->dccpavr_ack_ptr;
/* free records */
list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
@@ -434,8 +435,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
break;
found:
if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
- const u8 state = (*vector &
- DCCP_ACKVEC_STATE_MASK) >> 6;
+ const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
#ifdef CONFIG_IP_DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index ec7a89bb7b39..2424effac7f6 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -11,7 +11,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/time.h>
@@ -55,9 +54,7 @@ struct dccp_ackvec {
struct list_head dccpav_records;
struct timeval dccpav_time;
u8 dccpav_buf_head;
- u8 dccpav_buf_tail;
u8 dccpav_ack_ptr;
- u8 dccpav_sent_len;
u8 dccpav_vec_len;
u8 dccpav_buf_nonce;
u8 dccpav_ack_nonce;
@@ -108,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
- return av->dccpav_sent_len != av->dccpav_vec_len;
+ return av->dccpav_vec_len;
}
#else /* CONFIG_IP_DCCP_ACKVEC */
static inline int dccp_ackvec_init(void)
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index ca00191628f7..32752f750447 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -30,6 +30,14 @@ config IP_DCCP_CCID2
If in doubt, say M.
+config IP_DCCP_CCID2_DEBUG
+ bool "CCID2 debug"
+ depends on IP_DCCP_CCID2
+ ---help---
+ Enable CCID2 debug messages.
+
+ If in doubt, say N.
+
config IP_DCCP_CCID3
tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
depends on IP_DCCP
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d4f9e2d33453..2efb505aeb35 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -27,18 +27,15 @@
*
* BUGS:
* - sequence number wrapping
- * - jiffies wrapping
*/
-#include <linux/config.h>
#include "../ccid.h"
#include "../dccp.h"
#include "ccid2.h"
static int ccid2_debug;
-#undef CCID2_DEBUG
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
#define ccid2_pr_debug(format, a...) \
do { if (ccid2_debug) \
printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
@@ -47,9 +44,7 @@ static int ccid2_debug;
#define ccid2_pr_debug(format, a...)
#endif
-static const int ccid2_seq_len = 128;
-
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
{
int len = 0;
@@ -72,8 +67,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
/* packets are sent sequentially */
BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
- BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent);
- BUG_ON(len > ccid2_seq_len);
+ BUG_ON(time_before(seqp->ccid2s_sent,
+ prev->ccid2s_sent));
seqp = prev;
}
@@ -85,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
do {
seqp = seqp->ccid2s_prev;
len++;
- BUG_ON(len > ccid2_seq_len);
} while (seqp != hctx->ccid2hctx_seqh);
- BUG_ON(len != ccid2_seq_len);
ccid2_pr_debug("total len=%d\n", len);
+ BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
}
#else
#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
#endif
+static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
+ gfp_t gfp)
+{
+ struct ccid2_seq *seqp;
+ int i;
+
+ /* check if we have space to preserve the pointer to the buffer */
+ if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
+ sizeof(struct ccid2_seq*)))
+ return -ENOMEM;
+
+ /* allocate buffer and initialize linked list */
+ seqp = kmalloc(sizeof(*seqp) * num, gfp);
+ if (seqp == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < (num - 1); i++) {
+ seqp[i].ccid2s_next = &seqp[i + 1];
+ seqp[i + 1].ccid2s_prev = &seqp[i];
+ }
+ seqp[num - 1].ccid2s_next = seqp;
+ seqp->ccid2s_prev = &seqp[num - 1];
+
+ /* This is the first allocation. Initiate the head and tail. */
+ if (hctx->ccid2hctx_seqbufc == 0)
+ hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
+ else {
+ /* link the existing list with the one we just created */
+ hctx->ccid2hctx_seqh->ccid2s_next = seqp;
+ seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
+
+ hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
+ seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
+ }
+
+ /* store the original pointer to the buffer so we can free it */
+ hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
+ hctx->ccid2hctx_seqbufc++;
+
+ return 0;
+}
+
static int ccid2_hc_tx_send_packet(struct sock *sk,
struct sk_buff *skb, int len)
{
@@ -123,7 +159,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk,
}
}
- return 100; /* XXX */
+ return 1; /* XXX CCID should dequeue when ready instead of polling */
}
static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
@@ -151,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
dp->dccps_l_ack_ratio = val;
}
-static void ccid2_change_cwnd(struct sock *sk, int val)
+static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
{
- struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
if (val == 0)
val = 1;
@@ -165,6 +199,17 @@ static void ccid2_change_cwnd(struct sock *sk, int val)
hctx->ccid2hctx_cwnd = val;
}
+static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
+{
+ ccid2_pr_debug("change SRTT to %ld\n", val);
+ hctx->ccid2hctx_srtt = val;
+}
+
+static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
+{
+ hctx->ccid2hctx_pipe = val;
+}
+
static void ccid2_start_rto_timer(struct sock *sk);
static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -194,11 +239,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
ccid2_start_rto_timer(sk);
/* adjust pipe, cwnd etc */
- hctx->ccid2hctx_pipe = 0;
+ ccid2_change_pipe(hctx, 0);
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
if (hctx->ccid2hctx_ssthresh < 2)
hctx->ccid2hctx_ssthresh = 2;
- ccid2_change_cwnd(sk, 1);
+ ccid2_change_cwnd(hctx, 1);
/* clear state about stuff we sent */
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
@@ -233,13 +278,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ struct ccid2_seq *next;
u64 seq;
ccid2_hc_tx_check_sanity(hctx);
BUG_ON(!hctx->ccid2hctx_sendwait);
hctx->ccid2hctx_sendwait = 0;
- hctx->ccid2hctx_pipe++;
+ ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
BUG_ON(hctx->ccid2hctx_pipe < 0);
/* There is an issue. What if another packet is sent between
@@ -252,15 +298,23 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
hctx->ccid2hctx_seqh->ccid2s_seq = seq;
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
- hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
- ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
- hctx->ccid2hctx_pipe);
+ next = hctx->ccid2hctx_seqh->ccid2s_next;
+ /* check if we need to alloc more space */
+ if (next == hctx->ccid2hctx_seqt) {
+ int rc;
+
+ ccid2_pr_debug("allocating more space in history\n");
+ rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL);
+ BUG_ON(rc); /* XXX what do we do? */
- if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) {
- /* XXX allocate more space */
- WARN_ON(1);
+ next = hctx->ccid2hctx_seqh->ccid2s_next;
+ BUG_ON(next == hctx->ccid2hctx_seqt);
}
+ hctx->ccid2hctx_seqh = next;
+
+ ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
+ hctx->ccid2hctx_pipe);
hctx->ccid2hctx_sent++;
@@ -296,7 +350,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
ccid2_start_rto_timer(sk);
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
ccid2_pr_debug("Sent: seq=%llu\n", seq);
do {
@@ -399,7 +453,7 @@ static inline void ccid2_new_ack(struct sock *sk,
/* increase every 2 acks */
hctx->ccid2hctx_ssacks++;
if (hctx->ccid2hctx_ssacks == 2) {
- ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+ ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
hctx->ccid2hctx_ssacks = 0;
*maxincr = *maxincr - 1;
}
@@ -412,26 +466,28 @@ static inline void ccid2_new_ack(struct sock *sk,
hctx->ccid2hctx_acks++;
if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
- ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+ ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
hctx->ccid2hctx_acks = 0;
}
}
/* update RTO */
if (hctx->ccid2hctx_srtt == -1 ||
- (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) {
- unsigned long r = jiffies - seqp->ccid2s_sent;
+ time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
+ unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
int s;
/* first measurement */
if (hctx->ccid2hctx_srtt == -1) {
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
r, jiffies, seqp->ccid2s_seq);
- hctx->ccid2hctx_srtt = r;
+ ccid2_change_srtt(hctx, r);
hctx->ccid2hctx_rttvar = r >> 1;
} else {
/* RTTVAR */
long tmp = hctx->ccid2hctx_srtt - r;
+ long srtt;
+
if (tmp < 0)
tmp *= -1;
@@ -441,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk,
hctx->ccid2hctx_rttvar += tmp;
/* SRTT */
- hctx->ccid2hctx_srtt *= 7;
- hctx->ccid2hctx_srtt >>= 3;
+ srtt = hctx->ccid2hctx_srtt;
+ srtt *= 7;
+ srtt >>= 3;
tmp = r >> 3;
- hctx->ccid2hctx_srtt += tmp;
+ srtt += tmp;
+ ccid2_change_srtt(hctx, srtt);
}
s = hctx->ccid2hctx_rttvar << 2;
/* clock granularity is 1 when based on jiffies */
@@ -480,13 +538,29 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
- hctx->ccid2hctx_pipe--;
+ ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
BUG_ON(hctx->ccid2hctx_pipe < 0);
if (hctx->ccid2hctx_pipe == 0)
ccid2_hc_tx_kill_rto_timer(sk);
}
+static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
+ struct ccid2_seq *seqp)
+{
+ if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
+ ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
+ return;
+ }
+
+ hctx->ccid2hctx_last_cong = jiffies;
+
+ ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
+ hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
+ if (hctx->ccid2hctx_ssthresh < 2)
+ hctx->ccid2hctx_ssthresh = 2;
+}
+
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -497,7 +571,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
unsigned char veclen;
int offset = 0;
int done = 0;
- int loss = 0;
unsigned int maxincr = 0;
ccid2_hc_tx_check_sanity(hctx);
@@ -583,15 +656,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* run length
*/
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
- const u8 state = (*vector &
- DCCP_ACKVEC_STATE_MASK) >> 6;
+ const u8 state = *vector &
+ DCCP_ACKVEC_STATE_MASK;
/* new packet received or marked */
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
- loss = 1;
+ ccid2_congestion_event(hctx,
+ seqp);
} else
ccid2_new_ack(sk, seqp,
&maxincr);
@@ -643,7 +717,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* check for lost packets */
while (1) {
if (!seqp->ccid2s_acked) {
- loss = 1;
+ ccid2_pr_debug("Packet lost: %llu\n",
+ seqp->ccid2s_seq);
+ /* XXX need to traverse from tail -> head in
+ * order to detect multiple congestion events in
+ * one ack vector.
+ */
+ ccid2_congestion_event(hctx, seqp);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt)
@@ -662,53 +742,33 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
}
- if (loss) {
- /* XXX do bit shifts guarantee a 0 as the new bit? */
- ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
- hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
- if (hctx->ccid2hctx_ssthresh < 2)
- hctx->ccid2hctx_ssthresh = 2;
- }
-
ccid2_hc_tx_check_sanity(hctx);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
- int seqcount = ccid2_seq_len;
- int i;
- /* XXX init variables with proper values */
- hctx->ccid2hctx_cwnd = 1;
- hctx->ccid2hctx_ssthresh = 10;
+ ccid2_change_cwnd(hctx, 1);
+ /* Initialize ssthresh to infinity. This means that we will exit the
+ * initial slow-start after the first packet loss. This is what we
+ * want.
+ */
+ hctx->ccid2hctx_ssthresh = ~0;
hctx->ccid2hctx_numdupack = 3;
+ hctx->ccid2hctx_seqbufc = 0;
/* XXX init ~ to window size... */
- hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) *
- seqcount, gfp_any());
- if (hctx->ccid2hctx_seqbuf == NULL)
+ if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
return -ENOMEM;
- for (i = 0; i < (seqcount - 1); i++) {
- hctx->ccid2hctx_seqbuf[i].ccid2s_next =
- &hctx->ccid2hctx_seqbuf[i + 1];
- hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
- &hctx->ccid2hctx_seqbuf[i];
- }
- hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
- hctx->ccid2hctx_seqbuf;
- hctx->ccid2hctx_seqbuf->ccid2s_prev =
- &hctx->ccid2hctx_seqbuf[seqcount - 1];
-
- hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf;
- hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
hctx->ccid2hctx_sent = 0;
hctx->ccid2hctx_rto = 3 * HZ;
- hctx->ccid2hctx_srtt = -1;
+ ccid2_change_srtt(hctx, -1);
hctx->ccid2hctx_rttvar = -1;
hctx->ccid2hctx_lastrtt = 0;
hctx->ccid2hctx_rpdupack = -1;
+ hctx->ccid2hctx_last_cong = jiffies;
hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
@@ -721,10 +781,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
static void ccid2_hc_tx_exit(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ int i;
ccid2_hc_tx_kill_rto_timer(sk);
- kfree(hctx->ccid2hctx_seqbuf);
- hctx->ccid2hctx_seqbuf = NULL;
+
+ for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
+ kfree(hctx->ccid2hctx_seqbuf[i]);
+ hctx->ccid2hctx_seqbufc = 0;
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -745,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
static struct ccid_operations ccid2 = {
- .ccid_id = 2,
+ .ccid_id = DCCPC_CCID2,
.ccid_name = "ccid2",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 451a87464fa5..5b2ef4acb300 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,6 +35,9 @@ struct ccid2_seq {
struct ccid2_seq *ccid2s_next;
};
+#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_MAX 128
+
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
*
* @ccid2hctx_ssacks - ACKs recv in slow start
@@ -50,10 +53,11 @@ struct ccid2_hc_tx_sock {
int ccid2hctx_cwnd;
int ccid2hctx_ssacks;
int ccid2hctx_acks;
- int ccid2hctx_ssthresh;
+ unsigned int ccid2hctx_ssthresh;
int ccid2hctx_pipe;
int ccid2hctx_numdupack;
- struct ccid2_seq *ccid2hctx_seqbuf;
+ struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
+ int ccid2hctx_seqbufc;
struct ccid2_seq *ccid2hctx_seqh;
struct ccid2_seq *ccid2hctx_seqt;
long ccid2hctx_rto;
@@ -67,6 +71,7 @@ struct ccid2_hc_tx_sock {
u64 ccid2hctx_rpseq;
int ccid2hctx_rpdupack;
int ccid2hctx_sendwait;
+ unsigned long ccid2hctx_last_cong;
};
struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index b4a51d0355a5..67d2dc0e7c67 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -2,7 +2,7 @@
* net/dccp/ccids/ccid3.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005-6 Ian McDonald <imcdnzl@gmail.com>
+ * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* An implementation of the DCCP protocol
*
@@ -34,7 +34,6 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/config.h>
#include "../ccid.h"
#include "../dccp.h"
#include "lib/packet_history.h"
@@ -343,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
new_packet->dccphtx_ccval =
DCCP_SKB_CB(skb)->dccpd_ccval =
hctx->ccid3hctx_last_win_count;
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
}
out:
return rc;
@@ -414,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
if (len > 0) {
- hctx->ccid3hctx_t_nom = now;
+ timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
ccid3_calc_new_t_ipi(hctx);
ccid3_calc_new_delta(hctx);
timeval_add_usecs(&hctx->ccid3hctx_t_nom,
@@ -758,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
}
hcrx->ccid3hcrx_tstamp_last_feedback = now;
- hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval;
- hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno;
+ hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval;
hcrx->ccid3hcrx_bytes_recv = 0;
/* Convert to multiples of 10us */
@@ -783,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return 0;
- DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+ DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
if (dccp_packet_without_ack(skb))
return 0;
@@ -855,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
interval = 1;
}
found:
+ if (!tail) {
+ LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
+ __FUNCTION__);
+ return ~0;
+ }
rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
dccp_role(sk), sk, rtt);
@@ -865,9 +871,20 @@ found:
delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
+ if (x_recv == 0)
+ x_recv = hcrx->ccid3hcrx_x_recv;
+
tmp1 = (u64)x_recv * (u64)rtt;
do_div(tmp1,10000000);
tmp2 = (u32)tmp1;
+
+ if (!tmp2) {
+ LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
+ "%s: x_recv = %u, rtt =%u\n",
+ __FUNCTION__, x_recv, rtt);
+ return ~0;
+ }
+
fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
/* do not alter order above or you will get overflow on 32 bit */
p = tfrc_calc_x_reverse_lookup(fval);
@@ -883,31 +900,101 @@ found:
static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ struct dccp_li_hist_entry *head;
+ u64 seq_temp;
- if (seq_loss != DCCP_MAX_SEQNO + 1 &&
- list_empty(&hcrx->ccid3hcrx_li_hist)) {
- struct dccp_li_hist_entry *li_tail;
+ if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+ if (!dccp_li_hist_interval_new(ccid3_li_hist,
+ &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+ return;
- li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
- &hcrx->ccid3hcrx_li_hist,
- seq_loss, win_loss);
- if (li_tail == NULL)
+ head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+ struct dccp_li_hist_entry, dccplih_node);
+ head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+ } else {
+ struct dccp_li_hist_entry *entry;
+ struct list_head *tail;
+
+ head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+ struct dccp_li_hist_entry, dccplih_node);
+ /* FIXME win count check removed as was wrong */
+ /* should make this check with receive history */
+ /* and compare there as per section 10.2 of RFC4342 */
+
+ /* new loss event detected */
+ /* calculate last interval length */
+ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+ entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+
+ if (entry == NULL) {
+ printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
+ dump_stack();
return;
- li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
- } else
- LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of "
- "interval\n", __FUNCTION__);
+ }
+
+ list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+
+ tail = hcrx->ccid3hcrx_li_hist.prev;
+ list_del(tail);
+ kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+
+ /* Create the newest interval */
+ entry->dccplih_seqno = seq_loss;
+ entry->dccplih_interval = seq_temp;
+ entry->dccplih_win_count = win_loss;
+ }
}
-static void ccid3_hc_rx_detect_loss(struct sock *sk)
+static int ccid3_hc_rx_detect_loss(struct sock *sk,
+ struct dccp_rx_hist_entry *packet)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- u8 win_loss;
- const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist,
- &win_loss);
+ struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+ u64 seqno = packet->dccphrx_seqno;
+ u64 tmp_seqno;
+ int loss = 0;
+ u8 ccval;
+
+
+ tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+
+ if (!rx_hist ||
+ follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+ hcrx->ccid3hcrx_seqno_nonloss = seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+ goto detect_out;
+ }
+
- ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+ while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+ > TFRC_RECV_NUM_LATE_LOSS) {
+ loss = 1;
+ ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
+ hcrx->ccid3hcrx_ccval_nonloss);
+ tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+ dccp_inc_seqno(&tmp_seqno);
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ dccp_inc_seqno(&tmp_seqno);
+ while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
+ tmp_seqno, &ccval)) {
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = ccval;
+ dccp_inc_seqno(&tmp_seqno);
+ }
+ }
+
+ /* FIXME - this code could be simplified with above while */
+ /* but works at moment */
+ if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+ hcrx->ccid3hcrx_seqno_nonloss = seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+ }
+
+detect_out:
+ dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+ &hcrx->ccid3hcrx_li_hist, packet,
+ hcrx->ccid3hcrx_seqno_nonloss);
+ return loss;
}
static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -917,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct dccp_rx_hist_entry *packet;
struct timeval now;
u8 win_count;
- u32 p_prev, r_sample, t_elapsed;
- int ins;
+ u32 p_prev, rtt_prev, r_sample, t_elapsed;
+ int loss;
BUG_ON(hcrx == NULL ||
!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
@@ -933,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
case DCCP_PKT_DATAACK:
if (opt_recv->dccpor_timestamp_echo == 0)
break;
- p_prev = hcrx->ccid3hcrx_rtt;
+ rtt_prev = hcrx->ccid3hcrx_rtt;
dccp_timestamp(sk, &now);
timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
r_sample = timeval_usecs(&now);
@@ -952,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
r_sample / 10;
- if (p_prev != hcrx->ccid3hcrx_rtt)
- ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+ if (rtt_prev != hcrx->ccid3hcrx_rtt)
+ ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
dccp_role(sk), hcrx->ccid3hcrx_rtt,
opt_recv->dccpor_elapsed_time);
break;
@@ -974,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
win_count = packet->dccphrx_ccval;
- ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist, packet);
+ loss = ccid3_hc_rx_detect_loss(sk, packet);
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
return;
@@ -992,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
case TFRC_RSTATE_DATA:
hcrx->ccid3hcrx_bytes_recv += skb->len -
dccp_hdr(skb)->dccph_doff * 4;
- if (ins != 0)
+ if (loss)
break;
dccp_timestamp(sk, &now);
@@ -1013,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state));
- ccid3_hc_rx_detect_loss(sk);
p_prev = hcrx->ccid3hcrx_p;
/* Calculate loss event rate */
@@ -1023,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* Scaling up by 1000000 as fixed decimal */
if (i_mean != 0)
hcrx->ccid3hcrx_p = 1000000 / i_mean;
+ } else {
+ printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
+ dump_stack();
}
if (hcrx->ccid3hcrx_p > p_prev) {
@@ -1152,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
}
static struct ccid_operations ccid3 = {
- .ccid_id = 3,
+ .ccid_id = DCCPC_CCID3,
.ccid_name = "ccid3",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
@@ -1231,7 +1319,7 @@ static __exit void ccid3_module_exit(void)
}
module_exit(ccid3_module_exit);
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
"Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index f18b96d4e5a2..0a2cb7536d26 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,13 +1,13 @@
/*
* net/dccp/ccids/ccid3.h
*
- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
- * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
@@ -36,7 +36,6 @@
#ifndef _DCCP_CCID3_H_
#define _DCCP_CCID3_H_
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/time.h>
#include <linux/types.h>
@@ -121,9 +120,10 @@ struct ccid3_hc_rx_sock {
#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
- u64 ccid3hcrx_seqno_last_counter:48,
+ u64 ccid3hcrx_seqno_nonloss:48,
+ ccid3hcrx_ccval_nonloss:4,
ccid3hcrx_state:8,
- ccid3hcrx_last_counter:4;
+ ccid3hcrx_ccval_last_counter:4;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 4c01a54143ad..906c81ab9d4f 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -2,7 +2,7 @@
* net/dccp/ccids/lib/loss_interval.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
@@ -11,8 +11,8 @@
* (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
+#include <net/sock.h>
#include "loss_interval.h"
@@ -91,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
- if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+ if (li_entry->dccplih_interval != ~0) {
i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
w_tot += dccp_li_hist_w[i];
+ if (i != 0)
+ i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
}
- if (i != 0)
- i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
break;
@@ -108,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
i_tot = max(i_tot0, i_tot1);
- /* FIXME: Why do we do this? -Ian McDonald */
- if (i_tot * 4 < w_tot)
- i_tot = w_tot * 4;
+ if (!w_tot) {
+ LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+ return 1;
+ }
- return i_tot * 4 / w_tot;
+ return i_tot / w_tot;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
-struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
- struct list_head *list,
- const u64 seq_loss,
- const u8 win_loss)
+int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list, const u64 seq_loss, const u8 win_loss)
{
- struct dccp_li_hist_entry *tail = NULL, *entry;
+ struct dccp_li_hist_entry *entry;
int i;
- for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+ for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
if (entry == NULL) {
dccp_li_hist_purge(hist, list);
- return NULL;
+ dump_stack();
+ return 0;
}
- if (tail == NULL)
- tail = entry;
+ entry->dccplih_interval = ~0;
list_add(&entry->dccplih_node, list);
}
entry->dccplih_seqno = seq_loss;
entry->dccplih_win_count = win_loss;
- return tail;
+ return 1;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 417d9d82df3e..0ae85f0340b2 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -4,7 +4,7 @@
* net/dccp/ccids/lib/loss_interval.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
@@ -13,7 +13,6 @@
* any later version.
*/
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/time.h>
@@ -53,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
-extern struct dccp_li_hist_entry *
- dccp_li_hist_interval_new(struct dccp_li_hist *hist,
- struct list_head *list,
- const u64 seq_loss,
- const u8 win_loss);
+extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list, const u64 seq_loss, const u8 win_loss);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index d3f9d2053830..b876c9c81c65 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,13 +1,13 @@
/*
- * net/dccp/packet_history.h
+ * net/dccp/packet_history.c
*
- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
- * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
@@ -34,7 +34,6 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/string.h>
@@ -113,64 +112,27 @@ struct dccp_rx_hist_entry *
EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
-int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
- struct dccp_rx_hist_entry *packet)
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno)
{
- struct dccp_rx_hist_entry *entry, *next, *iter;
+ struct dccp_rx_hist_entry *entry, *next;
u8 num_later = 0;
- iter = dccp_rx_hist_head(rx_list);
- if (iter == NULL)
- dccp_rx_hist_add_entry(rx_list, packet);
- else {
- const u64 seqno = packet->dccphrx_seqno;
-
- if (after48(seqno, iter->dccphrx_seqno))
- dccp_rx_hist_add_entry(rx_list, packet);
- else {
- if (dccp_rx_hist_entry_data_packet(iter))
- num_later = 1;
-
- list_for_each_entry_continue(iter, rx_list,
- dccphrx_node) {
- if (after48(seqno, iter->dccphrx_seqno)) {
- dccp_rx_hist_add_entry(&iter->dccphrx_node,
- packet);
- goto trim_history;
- }
-
- if (dccp_rx_hist_entry_data_packet(iter))
- num_later++;
+ list_add(&packet->dccphrx_node, rx_list);
- if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
- dccp_rx_hist_entry_delete(hist, packet);
- return 1;
- }
- }
-
- if (num_later < TFRC_RECV_NUM_LATE_LOSS)
- dccp_rx_hist_add_entry(rx_list, packet);
- /*
- * FIXME: else what? should we destroy the packet
- * like above?
- */
- }
- }
-
-trim_history:
- /*
- * Trim history (remove all packets after the NUM_LATE_LOSS + 1
- * data packets)
- */
num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
if (!list_empty(li_list)) {
list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
if (num_later == 0) {
- list_del_init(&entry->dccphrx_node);
- dccp_rx_hist_entry_delete(hist, entry);
+ if (after48(nonloss_seqno,
+ entry->dccphrx_seqno)) {
+ list_del_init(&entry->dccphrx_node);
+ dccp_rx_hist_entry_delete(hist, entry);
+ }
} else if (dccp_rx_hist_entry_data_packet(entry))
--num_later;
}
@@ -218,94 +180,10 @@ trim_history:
--num_later;
}
}
-
- return 0;
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
-u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
- struct list_head *li_list, u8 *win_loss)
-{
- struct dccp_rx_hist_entry *entry, *next, *packet;
- struct dccp_rx_hist_entry *a_loss = NULL;
- struct dccp_rx_hist_entry *b_loss = NULL;
- u64 seq_loss = DCCP_MAX_SEQNO + 1;
- u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
-
- list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
- if (num_later == 0) {
- b_loss = entry;
- break;
- } else if (dccp_rx_hist_entry_data_packet(entry))
- --num_later;
- }
-
- if (b_loss == NULL)
- goto out;
-
- num_later = 1;
- list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
- if (num_later == 0) {
- a_loss = entry;
- break;
- } else if (dccp_rx_hist_entry_data_packet(entry))
- --num_later;
- }
-
- if (a_loss == NULL) {
- if (list_empty(li_list)) {
- /* no loss event have occured yet */
- LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
- "comparing to initial seqno\n",
- __FUNCTION__);
- goto out;
- } else {
- LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
- __FUNCTION__);
- goto out;
- }
- }
-
- /* Locate a lost data packet */
- entry = packet = b_loss;
- list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
- u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
- packet->dccphrx_seqno);
-
- if (delta != 0) {
- if (dccp_rx_hist_entry_data_packet(packet))
- --delta;
- /*
- * FIXME: check this, probably this % usage is because
- * in earlier drafts the ndp count was just 8 bits
- * long, but now it cam be up to 24 bits long.
- */
-#if 0
- if (delta % DCCP_NDP_LIMIT !=
- (packet->dccphrx_ndp -
- entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
-#endif
- if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
- seq_loss = entry->dccphrx_seqno;
- dccp_inc_seqno(&seq_loss);
- }
- }
- packet = entry;
- if (packet == a_loss)
- break;
- }
-out:
- if (seq_loss != DCCP_MAX_SEQNO + 1)
- *win_loss = a_loss->dccphrx_ccval;
- else
- *win_loss = 0; /* Paranoia */
-
- return seq_loss;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
-
struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
{
struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
@@ -366,6 +244,25 @@ struct dccp_tx_hist_entry *
EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval)
+{
+ struct dccp_rx_hist_entry *packet = NULL, *entry;
+
+ list_for_each_entry(entry, list, dccphrx_node)
+ if (entry->dccphrx_seqno == seq) {
+ packet = entry;
+ break;
+ }
+
+ if (packet)
+ *ccval = packet->dccphrx_ccval;
+
+ return packet != NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
+
void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
struct list_head *list,
struct dccp_tx_hist_entry *packet)
@@ -392,7 +289,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
"Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
MODULE_DESCRIPTION("DCCP TFRC library");
MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 122e96737ff6..067cf1c85a37 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,13 +1,13 @@
/*
* net/dccp/packet_history.h
*
- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
- * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
@@ -37,7 +37,6 @@
#ifndef _DCCP_PKT_HIST_
#define _DCCP_PKT_HIST_
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/time.h>
@@ -107,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
extern struct dccp_tx_hist_entry *
dccp_tx_hist_find_entry(const struct list_head *list,
const u64 seq);
+extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval);
static inline void dccp_tx_hist_add_entry(struct list_head *list,
struct dccp_tx_hist_entry *entry)
@@ -165,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
struct list_head *list);
-static inline void dccp_rx_hist_add_entry(struct list_head *list,
- struct dccp_rx_hist_entry *entry)
-{
- list_add(&entry->dccphrx_node, list);
-}
-
static inline struct dccp_rx_hist_entry *
dccp_rx_hist_head(struct list_head *list)
{
@@ -189,10 +184,11 @@ static inline int
entry->dccphrx_type == DCCP_PKT_DATAACK;
}
-extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
- struct dccp_rx_hist_entry *packet);
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno);
extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
struct list_head *li_list, u8 *win_loss);
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 130c4c40cfe3..45f30f59ea2a 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -4,7 +4,7 @@
* net/dccp/ccids/lib/tfrc.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index add3cae65e2d..44076e0c6591 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -2,7 +2,7 @@
* net/dccp/ccids/lib/tfrc_equation.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
@@ -12,7 +12,6 @@
* (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/div64.h>
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1fe509148689..0a21be437ed3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -5,14 +5,13 @@
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <net/snmp.h>
#include <net/sock.h>
@@ -82,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
return after48(seq1, seq2) ? seq1 : seq2;
}
+/* is seq1 next seqno after seq2 */
+static inline int follows48(const u64 seq1, const u64 seq2)
+{
+ int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
+
+ return diff==1;
+}
+
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
@@ -123,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
-extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_xmit(struct sock *sk, int block);
extern void dccp_write_space(struct sock *sk);
extern void dccp_init_xmit_timers(struct sock *sk);
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 0f25dc395967..0f3745585a94 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -9,7 +9,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/inet_diag.h>
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index b39e2a597889..a1b0682ee77c 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include "dccp.h"
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 6048373c7186..cee553d416ca 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -26,4 +26,11 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk);
extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
extern int dccp_feat_init(struct dccp_minisock *dmsk);
+extern int dccp_feat_default_sequence_window;
+extern int dccp_feat_default_rx_ccid;
+extern int dccp_feat_default_tx_ccid;
+extern int dccp_feat_default_ack_ratio;
+extern int dccp_feat_default_send_ack_vector;
+extern int dccp_feat_default_send_ndp_count;
+
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index bfc53665516b..7f9dc6ac58c9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f2c011fd2ba1..bf692c1c116f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/icmp.h>
#include <linux/module.h>
@@ -51,15 +50,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct dccp_sock *dp = dccp_sk(sk);
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
- u32 daddr, nexthop;
+ __be32 daddr, nexthop;
int tmp;
int err;
dp->dccps_role = DCCP_ROLE_CLIENT;
- if (dccp_service_not_initialized(sk))
- return -EPROTO;
-
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -502,11 +498,13 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
dccp_openreq_init(req, &dp, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
ireq = inet_rsk(req);
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
- req->rcv_wnd = 100; /* Fake, option parsing will get the
- right value */
+ req->rcv_wnd = dccp_feat_default_sequence_window;
ireq->opt = NULL;
/*
@@ -607,10 +605,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req != NULL)
return dccp_check_req(sk, skb, req, prev);
- nsk = __inet_lookup_established(&dccp_hashinfo,
- iph->saddr, dh->dccph_sport,
- iph->daddr, ntohs(dh->dccph_dport),
- inet_iif(skb));
+ nsk = inet_lookup_established(&dccp_hashinfo,
+ iph->saddr, dh->dccph_sport,
+ iph->daddr, dh->dccph_dport,
+ inet_iif(skb));
if (nsk != NULL) {
if (nsk->sk_state != DCCP_TIME_WAIT) {
bh_lock_sock(nsk);
@@ -680,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
}
};
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
@@ -923,7 +922,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
* Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
skb->nh.iph->saddr, dh->dccph_sport,
- skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+ skb->nh.iph->daddr, dh->dccph_dport,
inet_iif(skb));
/*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 65e2ab0886e6..7a47399cf31f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -12,7 +12,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/xfrm.h>
@@ -32,6 +31,7 @@
#include "dccp.h"
#include "ipv6.h"
+#include "feat.h"
/* Socket used for sending RSTs and ACKs */
static struct socket *dccp_v6_ctl_socket;
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = usin->sin6_port;
fl.fl_ip_sport = inet->sport;
+ security_sk_classify_flow(sk, &fl);
if (np->opt != NULL && np->opt->srcrt != NULL) {
const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -230,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
ipv6_addr_copy(&np->saddr, saddr);
inet->rcv_saddr = LOOPBACK4_IPV6;
- ip6_dst_store(sk, dst, NULL);
+ __ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
if (np->opt != NULL)
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet->dport;
fl.fl_ip_sport = inet->sport;
+ security_sk_classify_flow(sk, &fl);
err = ip6_dst_lookup(sk, &dst, &fl);
if (err) {
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
fl.oif = ireq6->iif;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_req_classify_flow(req, &fl);
if (dst == NULL) {
opt = np->opt;
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
fl.oif = inet6_iif(rxskb);
fl.fl_ip_dport = dh->dccph_dport;
fl.fl_ip_sport = dh->dccph_sport;
+ security_skb_classify_flow(rxskb, &fl);
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
fl.oif = inet6_iif(rxskb);
fl.fl_ip_dport = dh->dccph_dport;
fl.fl_ip_sport = dh->dccph_sport;
+ security_req_classify_flow(req, &fl);
if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -704,12 +709,14 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
dccp_openreq_init(req, &dp, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
ireq6 = inet6_rsk(req);
ireq = inet_rsk(req);
ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
- req->rcv_wnd = 100; /* Fake, option parsing will get the
- right value */
+ req->rcv_wnd = dccp_feat_default_sequence_window;
ireq6->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
@@ -843,6 +850,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_sk_classify_flow(sk, &fl);
if (ip6_dst_lookup(sk, &dst, &fl))
goto out;
@@ -864,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
* comment in that function for the gory details. -acme
*/
- ip6_dst_store(newsk, dst, NULL);
+ __ip6_dst_store(newsk, dst, NULL, NULL);
newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
NETIF_F_TSO);
newdp6 = (struct dccp6_sock *)newsk;
@@ -962,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return dccp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard;
/*
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index e4d4e9309270..6eef81fdbe56 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -11,7 +11,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/ipv6.h>
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c0349e5b0551..9045438d6b36 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
diff --git a/net/dccp/options.c b/net/dccp/options.c
index e9feb2a0c770..07a34696ac97 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -4,14 +4,13 @@
* An implementation of the DCCP protocol
* Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -30,6 +29,8 @@ int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
+EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window);
+
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7409e4a3abdf..7102e3aed4ca 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -199,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
while (1) {
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ if (sk->sk_err)
goto do_error;
if (!*timeo)
goto do_nonblock;
@@ -235,37 +234,72 @@ do_interrupted:
goto out;
}
-int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+static void dccp_write_xmit_timer(unsigned long data) {
+ struct sock *sk = (struct sock *)data;
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+ else
+ dccp_write_xmit(sk, 0);
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+void dccp_write_xmit(struct sock *sk, int block)
{
- const struct dccp_sock *dp = dccp_sk(sk);
- int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb;
+ long timeo = 30000; /* If a packet is taking longer than 2 secs
+ we have other issues */
+
+ while ((skb = skb_peek(&sk->sk_write_queue))) {
+ int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
skb->len);
- if (err > 0)
- err = dccp_wait_for_ccid(sk, skb, timeo);
+ if (err > 0) {
+ if (!block) {
+ sk_reset_timer(sk, &dp->dccps_xmit_timer,
+ msecs_to_jiffies(err)+jiffies);
+ break;
+ } else
+ err = dccp_wait_for_ccid(sk, skb, &timeo);
+ if (err) {
+ printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
+ " %d\n", __FUNCTION__, err);
+ dump_stack();
+ }
+ }
- if (err == 0) {
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- const int len = skb->len;
+ skb_dequeue(&sk->sk_write_queue);
+ if (err == 0) {
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ const int len = skb->len;
- if (sk->sk_state == DCCP_PARTOPEN) {
- /* See 8.1.5. Handshake Completion */
- inet_csk_schedule_ack(sk);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ if (sk->sk_state == DCCP_PARTOPEN) {
+ /* See 8.1.5. Handshake Completion */
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
inet_csk(sk)->icsk_rto,
DCCP_RTO_MAX);
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- } else if (dccp_ack_pending(sk))
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- else
- dcb->dccpd_type = DCCP_PKT_DATA;
-
- err = dccp_transmit_skb(sk, skb);
- ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
- } else
- kfree_skb(skb);
-
- return err;
+ dcb->dccpd_type = DCCP_PKT_DATAACK;
+ } else if (dccp_ack_pending(sk))
+ dcb->dccpd_type = DCCP_PKT_DATAACK;
+ else
+ dcb->dccpd_type = DCCP_PKT_DATA;
+
+ err = dccp_transmit_skb(sk, skb);
+ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+ if (err) {
+ printk(KERN_CRIT "%s:err from "
+ "ccid_hc_tx_packet_sent %d\n",
+ __FUNCTION__, err);
+ dump_stack();
+ }
+ } else
+ kfree(skb);
+ }
}
int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -427,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk)
dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
icsk->icsk_retransmits = 0;
+ init_timer(&dp->dccps_xmit_timer);
+ dp->dccps_xmit_timer.data = (unsigned long)sk;
+ dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
}
int dccp_connect(struct sock *sk)
@@ -561,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active)
DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
if (active) {
+ dccp_write_xmit(sk, 1);
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, prio));
+ /* FIXME do we need a retransmit timer here? */
} else
dccp_transmit_skb(sk, skb);
}
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
new file mode 100644
index 000000000000..146496fce2e2
--- /dev/null
+++ b/net/dccp/probe.c
@@ -0,0 +1,198 @@
+/*
+ * dccp_probe - Observe the DCCP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for DCCP from Stephen Hemminger's code
+ * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/dccp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/vmalloc.h>
+
+#include "dccp.h"
+#include "ccid.h"
+#include "ccids/ccid3.h"
+
+static int port;
+
+static int bufsize = 64 * 1024;
+
+static const char procname[] = "dccpprobe";
+
+struct {
+ struct kfifo *fifo;
+ spinlock_t lock;
+ wait_queue_head_t wait;
+ struct timeval tstart;
+} dccpw;
+
+static void printl(const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ struct timeval now;
+ char tbuf[256];
+
+ va_start(args, fmt);
+ do_gettimeofday(&now);
+
+ now.tv_sec -= dccpw.tstart.tv_sec;
+ now.tv_usec -= dccpw.tstart.tv_usec;
+ if (now.tv_usec < 0) {
+ --now.tv_sec;
+ now.tv_usec += 1000000;
+ }
+
+ len = sprintf(tbuf, "%lu.%06lu ",
+ (unsigned long) now.tv_sec,
+ (unsigned long) now.tv_usec);
+ len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
+ va_end(args);
+
+ kfifo_put(dccpw.fifo, tbuf, len);
+ wake_up(&dccpw.wait);
+}
+
+static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t size)
+{
+ const struct dccp_minisock *dmsk = dccp_msk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ccid3_hc_tx_sock *hctx;
+
+ if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+ hctx = ccid3_hc_tx_sk(sk);
+ else
+ hctx = NULL;
+
+ if (port == 0 || ntohs(inet->dport) == port ||
+ ntohs(inet->sport) == port) {
+ if (hctx)
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+ else
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ }
+
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe dccp_send_probe = {
+ .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
+ .entry = (kprobe_opcode_t *)&jdccp_sendmsg,
+};
+
+static int dccpprobe_open(struct inode *inode, struct file *file)
+{
+ kfifo_reset(dccpw.fifo);
+ do_gettimeofday(&dccpw.tstart);
+ return 0;
+}
+
+static ssize_t dccpprobe_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ int error = 0, cnt = 0;
+ unsigned char *tbuf;
+
+ if (!buf || len < 0)
+ return -EINVAL;
+
+ if (len == 0)
+ return 0;
+
+ tbuf = vmalloc(len);
+ if (!tbuf)
+ return -ENOMEM;
+
+ error = wait_event_interruptible(dccpw.wait,
+ __kfifo_len(dccpw.fifo) != 0);
+ if (error)
+ goto out_free;
+
+ cnt = kfifo_get(dccpw.fifo, tbuf, len);
+ error = copy_to_user(buf, tbuf, cnt);
+
+out_free:
+ vfree(tbuf);
+
+ return error ? error : cnt;
+}
+
+static struct file_operations dccpprobe_fops = {
+ .owner = THIS_MODULE,
+ .open = dccpprobe_open,
+ .read = dccpprobe_read,
+};
+
+static __init int dccpprobe_init(void)
+{
+ int ret = -ENOMEM;
+
+ init_waitqueue_head(&dccpw.wait);
+ spin_lock_init(&dccpw.lock);
+ dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
+
+ if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
+ goto err0;
+
+ ret = register_jprobe(&dccp_send_probe);
+ if (ret)
+ goto err1;
+
+ pr_info("DCCP watch registered (port=%d)\n", port);
+ return 0;
+err1:
+ proc_net_remove(procname);
+err0:
+ kfifo_free(dccpw.fifo);
+ return ret;
+}
+module_init(dccpprobe_init);
+
+static __exit void dccpprobe_exit(void)
+{
+ kfifo_free(dccpw.fifo);
+ proc_net_remove(procname);
+ unregister_jprobe(&dccp_send_probe);
+
+}
+module_exit(dccpprobe_exit);
+
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
+MODULE_DESCRIPTION("DCCP snooper");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5317fd3e6691..72cbdcfc2c65 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -9,7 +9,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -218,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
- dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
+ dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
return 0;
@@ -268,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
- /*
- * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
- * before calling listen()
- */
- if (dccp_service_not_initialized(sk))
- return -EPROTO;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
}
@@ -485,7 +478,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
else
err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
- (struct dccp_so_feat *)
+ (struct dccp_so_feat __user *)
optval);
break;
@@ -494,7 +487,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
else
err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
- (struct dccp_so_feat *)
+ (struct dccp_so_feat __user *)
optval);
break;
@@ -541,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len,
int err = -ENOENT, slen = 0, total_len = sizeof(u32);
lock_sock(sk);
- if (dccp_service_not_initialized(sk))
- goto out;
-
if ((sl = dp->dccps_service_list) != NULL) {
slen = sl->dccpsl_nr * sizeof(u32);
total_len += slen;
@@ -663,17 +653,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (rc != 0)
goto out_discard;
- rc = dccp_write_xmit(sk, skb, &timeo);
- /*
- * XXX we don't use sk_write_queue, so just discard the packet.
- * Current plan however is to _use_ sk_write_queue with
- * an algorith similar to tcp_sendmsg, where the main difference
- * is that in DCCP we have to respect packet boundaries, so
- * no coalescing of skbs.
- *
- * This bug was _quickly_ found & fixed by just looking at an OSTRA
- * generated callgraph 8) -acme
- */
+ skb_queue_tail(&sk->sk_write_queue, skb);
+ dccp_write_xmit(sk,0);
out_release:
release_sock(sk);
return rc ? : len;
@@ -847,6 +828,7 @@ static int dccp_close_state(struct sock *sk)
void dccp_close(struct sock *sk, long timeout)
{
+ struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
int state;
@@ -863,6 +845,8 @@ void dccp_close(struct sock *sk, long timeout)
goto adjudge_to_death;
}
+ sk_stop_timer(sk, &dp->dccps_xmit_timer);
+
/*
* We need to flush the recv. buffs. We do this only on the
* descriptor close, not protocol-sourced closes, because the
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 64c89e9c229e..38bc157876f3 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -9,21 +9,14 @@
* as published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
+#include "feat.h"
#ifndef CONFIG_SYSCTL
#error This file should not be compiled without CONFIG_SYSCTL defined
#endif
-extern int dccp_feat_default_sequence_window;
-extern int dccp_feat_default_rx_ccid;
-extern int dccp_feat_default_tx_ccid;
-extern int dccp_feat_default_ack_ratio;
-extern int dccp_feat_default_send_ack_vector;
-extern int dccp_feat_default_send_ndp_count;
-
static struct ctl_table dccp_default_table[] = {
{
.ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 5244415e5f18..8447742f5615 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 92f2ec46fd22..36e72cb145b0 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -27,6 +27,7 @@ config DECNET
config DECNET_ROUTER
bool "DECnet: router support (EXPERIMENTAL)"
depends on DECNET && EXPERIMENTAL
+ select FIB_RULES
---help---
Add support for turning your DECnet Endnode into a level 1 or 2
router. This is an experimental, but functional option. If you
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2b289ef20ab3..70e027375682 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -99,7 +99,6 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
dn_bind fixes
*******************************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -131,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <linux/poll.h>
#include <net/neighbour.h>
#include <net/dst.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_nsp.h>
#include <net/dn_dev.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index a26ff9f44576..01861feb608d 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -24,7 +24,6 @@
* devices. All mtu based now.
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -35,6 +34,7 @@
#include <linux/seq_file.h>
#include <linux/timer.h>
#include <linux/string.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/skbuff.h>
@@ -46,6 +46,7 @@
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_dev.h>
#include <net/dn_route.h>
@@ -414,11 +415,7 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
{
struct dn_ifaddr *ifa;
- ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
-
- if (ifa) {
- memset(ifa, 0, sizeof(*ifa));
- }
+ ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
return ifa;
}
@@ -749,20 +746,23 @@ rtattr_failure:
static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+ int payload = sizeof(struct ifaddrmsg) + 128;
+ int err = -ENOBUFS;
- skb = alloc_skb(size, GFP_KERNEL);
- if (!skb) {
- netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
- return;
- }
- if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+ skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
}
static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1106,10 +1106,9 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
return NULL;
*err = -ENOBUFS;
- if ((dn_db = kmalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
+ if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
return NULL;
- memset(dn_db, 0, sizeof(struct dn_dev));
memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
smp_wmb();
dev->dn_ptr = dn_db;
@@ -1423,8 +1422,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
[RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
[RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
.dumpit = dn_fib_dump, },
- [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, },
- [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, },
[RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, },
#else
[RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index bd4ce8681a12..1cf010124ec5 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -17,7 +17,6 @@
* this code was copied from it.
*
*/
-#include <linux/config.h>
#include <linux/string.h>
#include <linux/net.h>
#include <linux/socket.h>
@@ -35,6 +34,7 @@
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_route.h>
#include <net/dn_fib.h>
@@ -55,11 +55,9 @@
#define endfor_nexthops(fi) }
-extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
static DEFINE_SPINLOCK(dn_fib_multipath_lock);
static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_RWLOCK(dn_fib_info_lock);
+static DEFINE_SPINLOCK(dn_fib_info_lock);
static struct
{
@@ -80,6 +78,9 @@ static struct
[RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
};
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
+static int dn_fib_sync_up(struct net_device *dev);
+
void dn_fib_free_info(struct dn_fib_info *fi)
{
if (fi->fib_dead == 0) {
@@ -97,7 +98,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
void dn_fib_release_info(struct dn_fib_info *fi)
{
- write_lock(&dn_fib_info_lock);
+ spin_lock(&dn_fib_info_lock);
if (fi && --fi->fib_treeref == 0) {
if (fi->fib_next)
fi->fib_next->fib_prev = fi->fib_prev;
@@ -108,7 +109,7 @@ void dn_fib_release_info(struct dn_fib_info *fi)
fi->fib_dead = 1;
dn_fib_info_put(fi);
}
- write_unlock(&dn_fib_info_lock);
+ spin_unlock(&dn_fib_info_lock);
}
static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
@@ -284,11 +285,10 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
goto err_inval;
}
- fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
+ fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
err = -ENOBUFS;
if (fi == NULL)
goto failure;
- memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct dn_fib_nh));
fi->fib_protocol = r->rtm_protocol;
fi->fib_nhs = nhs;
@@ -380,13 +380,13 @@ link_it:
fi->fib_treeref++;
atomic_inc(&fi->fib_clntref);
- write_lock(&dn_fib_info_lock);
+ spin_lock(&dn_fib_info_lock);
fi->fib_next = dn_fib_info_list;
fi->fib_prev = NULL;
if (dn_fib_info_list)
dn_fib_info_list->fib_prev = fi;
dn_fib_info_list = fi;
- write_unlock(&dn_fib_info_lock);
+ spin_unlock(&dn_fib_info_lock);
return fi;
err_inval:
@@ -492,7 +492,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
if (attr) {
if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
return -EINVAL;
- if (i != RTA_MULTIPATH && i != RTA_METRICS)
+ if (i != RTA_MULTIPATH && i != RTA_METRICS &&
+ i != RTA_TABLE)
rta[i-1] = (struct rtattr *)RTA_DATA(attr);
}
}
@@ -509,7 +510,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (dn_fib_check_attr(r, rta))
return -EINVAL;
- tb = dn_fib_get_table(r->rtm_table, 0);
+ tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
if (tb)
return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
@@ -525,46 +526,13 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (dn_fib_check_attr(r, rta))
return -EINVAL;
- tb = dn_fib_get_table(r->rtm_table, 1);
+ tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
if (tb)
return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
return -ENOBUFS;
}
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- int t;
- int s_t;
- struct dn_fib_table *tb;
-
- if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
- return dn_cache_dump(skb, cb);
-
- s_t = cb->args[0];
- if (s_t == 0)
- s_t = cb->args[0] = RT_MIN_TABLE;
-
- for(t = s_t; t <= RT_TABLE_MAX; t++) {
- if (t < s_t)
- continue;
- if (t > s_t)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- tb = dn_fib_get_table(t, 0);
- if (tb == NULL)
- continue;
- if (tb->dump(tb, skb, cb) < 0)
- break;
- }
-
- cb->args[0] = t;
-
- return skb->len;
-}
-
static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
{
struct dn_fib_table *tb;
@@ -684,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event,
return NOTIFY_DONE;
}
-int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
@@ -728,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
}
-int dn_fib_sync_up(struct net_device *dev)
+static int dn_fib_sync_up(struct net_device *dev)
{
int ret = 0;
@@ -762,22 +730,6 @@ int dn_fib_sync_up(struct net_device *dev)
return ret;
}
-void dn_fib_flush(void)
-{
- int flushed = 0;
- struct dn_fib_table *tb;
- int id;
-
- for(id = RT_TABLE_MAX; id > 0; id--) {
- if ((tb = dn_fib_get_table(id, 0)) == NULL)
- continue;
- flushed += tb->flush(tb);
- }
-
- if (flushed)
- dn_rt_cache_flush(-1);
-}
-
static struct notifier_block dn_fib_dnaddr_notifier = {
.notifier_call = dn_fib_dnaddr_event,
};
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 66e230c3b328..ff0ebe99137d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -24,7 +24,6 @@
*
*/
-#include <linux/config.h>
#include <linux/net.h>
#include <linux/module.h>
#include <linux/socket.h>
@@ -581,12 +580,11 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
- memset(s, 0, sizeof(*s));
rc = seq_open(file, &dn_neigh_seq_ops);
if (rc)
goto out_kfree;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index a2ba9db1c376..72ecc6e62ec4 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -45,7 +45,6 @@
GNU General Public License for more details.
*******************************************************************************/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -587,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
goto out;
}
- err = sk_filter(sk, skb, 0);
+ err = sk_filter(sk, skb);
if (err)
goto out;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5abf7057af00..dd0761e3d280 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -55,7 +55,6 @@
GNU General Public License for more details.
*******************************************************************************/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -81,6 +80,7 @@
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_dev.h>
#include <net/dn_nsp.h>
@@ -926,8 +926,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
for(dev_out = dev_base; dev_out; dev_out = dev_out->next) {
if (!dev_out->dn_ptr)
continue;
- if (dn_dev_islocal(dev_out, oldflp->fld_src))
- break;
+ if (!dn_dev_islocal(dev_out, oldflp->fld_src))
+ continue;
+ if ((dev_out->flags & IFF_LOOPBACK) &&
+ oldflp->fld_dst &&
+ !dn_dev_islocal(dev_out, oldflp->fld_dst))
+ continue;
+ break;
}
read_unlock(&dev_base_lock);
if (dev_out == NULL)
@@ -1280,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
dev_hold(out_dev);
if (res.r)
- src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags);
+ src_map = fl.fld_src; /* no NAT support for now */
gateway = DN_FIB_RES_GW(res);
if (res.type == RTN_NAT) {
@@ -1481,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
r->rtm_src_len = 0;
r->rtm_tos = 0;
r->rtm_table = RT_TABLE_MAIN;
+ RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
r->rtm_type = rt->rt_type;
r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -1605,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
goto out_free;
}
- err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-
- return err;
+ return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
out_free:
kfree_skb(skb);
@@ -1777,14 +1781,9 @@ void __init dn_route_init(void)
{
int i, goal, order;
- dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache",
- sizeof(struct dn_route),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
-
- if (!dn_dst_ops.kmem_cachep)
- panic("DECnet: Failed to allocate dn_dst_cache\n");
-
+ dn_dst_ops.kmem_cachep =
+ kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
init_timer(&dn_route_timer);
dn_route_timer.function = dn_dst_check_expire;
dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 446faafe2065..3e0c882c90bf 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -11,261 +11,213 @@
*
*
* Changes:
+ * Steve Whitehouse <steve@chygwyn.com>
+ * Updated for Thomas Graf's generic rules
*
*/
-#include <linux/config.h>
-#include <linux/string.h>
#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
#include <linux/init.h>
-#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
#include <linux/netdevice.h>
-#include <linux/timer.h>
#include <linux/spinlock.h>
-#include <linux/in_route.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
+static struct fib_rules_ops dn_fib_rules_ops;
+
struct dn_fib_rule
{
- struct hlist_node r_hlist;
- atomic_t r_clntref;
- u32 r_preference;
- unsigned char r_table;
- unsigned char r_action;
- unsigned char r_dst_len;
- unsigned char r_src_len;
- __le16 r_src;
- __le16 r_srcmask;
- __le16 r_dst;
- __le16 r_dstmask;
- __le16 r_srcmap;
- u8 r_flags;
+ struct fib_rule common;
+ unsigned char dst_len;
+ unsigned char src_len;
+ __le16 src;
+ __le16 srcmask;
+ __le16 dst;
+ __le16 dstmask;
+ __le16 srcmap;
+ u8 flags;
#ifdef CONFIG_DECNET_ROUTE_FWMARK
- u32 r_fwmark;
+ u32 fwmark;
+ u32 fwmask;
#endif
- int r_ifindex;
- char r_ifname[IFNAMSIZ];
- int r_dead;
- struct rcu_head rcu;
};
static struct dn_fib_rule default_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_preference = 0x7fff,
- .r_table = RT_TABLE_MAIN,
- .r_action = RTN_UNICAST
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7fff,
+ .table = RT_TABLE_MAIN,
+ .action = FR_ACT_TO_TBL,
+ },
};
-static struct hlist_head dn_fib_rules;
+static LIST_HEAD(dn_fib_rules);
+
-int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct dn_fib_rule *r;
- struct hlist_node *node;
- int err = -ESRCH;
-
- hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
- if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
- rtm->rtm_src_len == r->r_src_len &&
- rtm->rtm_dst_len == r->r_dst_len &&
- (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
- (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
- (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
- (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
- (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-
- err = -EPERM;
- if (r == &default_rule)
- break;
-
- hlist_del_rcu(&r->r_hlist);
- r->r_dead = 1;
- dn_fib_rule_put(r);
- err = 0;
- break;
- }
- }
+ struct fib_lookup_arg arg = {
+ .result = res,
+ };
+ int err;
+
+ err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg);
+ res->r = arg.rule;
return err;
}
-static inline void dn_fib_rule_put_rcu(struct rcu_head *head)
+static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
{
- struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu);
- kfree(r);
-}
+ int err = -EAGAIN;
+ struct dn_fib_table *tbl;
-void dn_fib_rule_put(struct dn_fib_rule *r)
-{
- if (atomic_dec_and_test(&r->r_clntref)) {
- if (r->r_dead)
- call_rcu(&r->rcu, dn_fib_rule_put_rcu);
- else
- printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n");
+ switch(rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+
+ case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
+ goto errout;
+
+ case FR_ACT_PROHIBIT:
+ err = -EACCES;
+ goto errout;
+
+ case FR_ACT_BLACKHOLE:
+ default:
+ err = -EINVAL;
+ goto errout;
}
+
+ tbl = dn_fib_get_table(rule->table, 0);
+ if (tbl == NULL)
+ goto errout;
+
+ err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result);
+ if (err > 0)
+ err = -EAGAIN;
+errout:
+ return err;
}
+static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
+ [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+ [FRA_PRIORITY] = { .type = NLA_U32 },
+ [FRA_SRC] = { .type = NLA_U16 },
+ [FRA_DST] = { .type = NLA_U16 },
+ [FRA_FWMARK] = { .type = NLA_U32 },
+ [FRA_FWMASK] = { .type = NLA_U32 },
+ [FRA_TABLE] = { .type = NLA_U32 },
+};
-int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct dn_fib_rule *r, *new_r, *last = NULL;
- struct hlist_node *node = NULL;
- unsigned char table_id;
-
- if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16)
- return -EINVAL;
-
- if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
- return -EINVAL;
-
- if (rtm->rtm_type == RTN_NAT)
- return -EINVAL;
-
- table_id = rtm->rtm_table;
- if (table_id == RT_TABLE_UNSPEC) {
- struct dn_fib_table *tb;
- if (rtm->rtm_type == RTN_UNICAST) {
- if ((tb = dn_fib_empty_table()) == NULL)
- return -ENOBUFS;
- table_id = tb->n;
- }
- }
+ struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+ u16 daddr = fl->fld_dst;
+ u16 saddr = fl->fld_src;
+
+ if (((saddr ^ r->src) & r->srcmask) ||
+ ((daddr ^ r->dst) & r->dstmask))
+ return 0;
- new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
- if (!new_r)
- return -ENOMEM;
- memset(new_r, 0, sizeof(*new_r));
-
- if (rta[RTA_SRC-1])
- memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
- if (rta[RTA_DST-1])
- memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
- if (rta[RTA_GATEWAY-1])
- memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
- new_r->r_src_len = rtm->rtm_src_len;
- new_r->r_dst_len = rtm->rtm_dst_len;
- new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
- new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if (rta[RTA_PROTOINFO-1])
- memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+ if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask)
+ return 0;
#endif
- new_r->r_action = rtm->rtm_type;
- new_r->r_flags = rtm->rtm_flags;
- if (rta[RTA_PRIORITY-1])
- memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
- new_r->r_table = table_id;
- if (rta[RTA_IIF-1]) {
- struct net_device *dev;
- rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
- new_r->r_ifindex = -1;
- dev = dev_get_by_name(new_r->r_ifname);
- if (dev) {
- new_r->r_ifindex = dev->ifindex;
- dev_put(dev);
- }
- }
- r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist);
- if (!new_r->r_preference) {
- if (r && r->r_hlist.next != NULL) {
- r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist);
- if (r->r_preference)
- new_r->r_preference = r->r_preference - 1;
+ return 1;
+}
+
+static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ int err = -EINVAL;
+ struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+ if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+ goto errout;
+
+ if (rule->table == RT_TABLE_UNSPEC) {
+ if (rule->action == FR_ACT_TO_TBL) {
+ struct dn_fib_table *table;
+
+ table = dn_fib_empty_table();
+ if (table == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ rule->table = table->n;
}
}
- hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
- if (r->r_preference > new_r->r_preference)
- break;
- last = r;
+ if (tb[FRA_SRC])
+ r->src = nla_get_u16(tb[FRA_SRC]);
+
+ if (tb[FRA_DST])
+ r->dst = nla_get_u16(tb[FRA_DST]);
+
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+ if (tb[FRA_FWMARK]) {
+ r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+ if (r->fwmark)
+ /* compatibility: if the mark value is non-zero all bits
+ * are compared unless a mask is explicitly specified.
+ */
+ r->fwmask = 0xFFFFFFFF;
}
- atomic_inc(&new_r->r_clntref);
- if (last)
- hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist);
- else
- hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist);
- return 0;
-}
+ if (tb[FRA_FWMASK])
+ r->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+ r->src_len = frh->src_len;
+ r->srcmask = dnet_make_mask(r->src_len);
+ r->dst_len = frh->dst_len;
+ r->dstmask = dnet_make_mask(r->dst_len);
+ err = 0;
+errout:
+ return err;
+}
-int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res)
+static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
{
- struct dn_fib_rule *r, *policy;
- struct dn_fib_table *tb;
- __le16 saddr = flp->fld_src;
- __le16 daddr = flp->fld_dst;
- struct hlist_node *node;
- int err;
+ struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+ if (frh->src_len && (r->src_len != frh->src_len))
+ return 0;
- rcu_read_lock();
+ if (frh->dst_len && (r->dst_len != frh->dst_len))
+ return 0;
- hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) {
- if (((saddr^r->r_src) & r->r_srcmask) ||
- ((daddr^r->r_dst) & r->r_dstmask) ||
#ifdef CONFIG_DECNET_ROUTE_FWMARK
- (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) ||
+ if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+ return 0;
+
+ if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+ return 0;
#endif
- (r->r_ifindex && r->r_ifindex != flp->iif))
- continue;
-
- switch(r->r_action) {
- case RTN_UNICAST:
- case RTN_NAT:
- policy = r;
- break;
- case RTN_UNREACHABLE:
- rcu_read_unlock();
- return -ENETUNREACH;
- default:
- case RTN_BLACKHOLE:
- rcu_read_unlock();
- return -EINVAL;
- case RTN_PROHIBIT:
- rcu_read_unlock();
- return -EACCES;
- }
- if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL)
- continue;
- err = tb->lookup(tb, flp, res);
- if (err == 0) {
- res->r = policy;
- if (policy)
- atomic_inc(&policy->r_clntref);
- rcu_read_unlock();
- return 0;
- }
- if (err < 0 && err != -EAGAIN) {
- rcu_read_unlock();
- return err;
- }
- }
+ if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
+ return 0;
+
+ if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST])))
+ return 0;
- rcu_read_unlock();
- return -ESRCH;
+ return 1;
}
unsigned dnet_addr_type(__le16 addr)
@@ -273,7 +225,7 @@ unsigned dnet_addr_type(__le16 addr)
struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
struct dn_fib_res res;
unsigned ret = RTN_UNICAST;
- struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL];
+ struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
res.r = NULL;
@@ -286,141 +238,79 @@ unsigned dnet_addr_type(__le16 addr)
return ret;
}
-__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags)
+static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
{
- struct dn_fib_rule *r = res->r;
+ struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- if (r->r_action == RTN_NAT) {
- int addrtype = dnet_addr_type(r->r_srcmap);
+ frh->family = AF_DECnet;
+ frh->dst_len = r->dst_len;
+ frh->src_len = r->src_len;
+ frh->tos = 0;
- if (addrtype == RTN_NAT) {
- saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
- *flags |= RTCF_SNAT;
- } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
- saddr = r->r_srcmap;
- *flags |= RTCF_MASQ;
- }
- }
- return saddr;
-}
-
-static void dn_fib_rules_detach(struct net_device *dev)
-{
- struct hlist_node *node;
- struct dn_fib_rule *r;
-
- hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
- if (r->r_ifindex == dev->ifindex)
- r->r_ifindex = -1;
- }
-}
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+ if (r->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
+ if (r->fwmask || r->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask);
+#endif
+ if (r->dst_len)
+ NLA_PUT_U16(skb, FRA_DST, r->dst);
+ if (r->src_len)
+ NLA_PUT_U16(skb, FRA_SRC, r->src);
-static void dn_fib_rules_attach(struct net_device *dev)
-{
- struct hlist_node *node;
- struct dn_fib_rule *r;
+ return 0;
- hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
- if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
- r->r_ifindex = dev->ifindex;
- }
+nla_put_failure:
+ return -ENOBUFS;
}
-static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+static u32 dn_fib_rule_default_pref(void)
{
- struct net_device *dev = ptr;
-
- switch(event) {
- case NETDEV_UNREGISTER:
- dn_fib_rules_detach(dev);
- dn_fib_sync_down(0, dev, 1);
- case NETDEV_REGISTER:
- dn_fib_rules_attach(dev);
- dn_fib_sync_up(dev);
+ struct list_head *pos;
+ struct fib_rule *rule;
+
+ if (!list_empty(&dn_fib_rules)) {
+ pos = dn_fib_rules.next;
+ if (pos->next != &dn_fib_rules) {
+ rule = list_entry(pos->next, struct fib_rule, list);
+ if (rule->pref)
+ return rule->pref - 1;
+ }
}
- return NOTIFY_DONE;
-}
-
-
-static struct notifier_block dn_fib_rules_notifier = {
- .notifier_call = dn_fib_rules_event,
-};
-
-static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
- struct netlink_callback *cb, unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
-
-
- nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
- rtm->rtm_family = AF_DECnet;
- rtm->rtm_dst_len = r->r_dst_len;
- rtm->rtm_src_len = r->r_src_len;
- rtm->rtm_tos = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if (r->r_fwmark)
- RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
-#endif
- rtm->rtm_table = r->r_table;
- rtm->rtm_protocol = 0;
- rtm->rtm_scope = 0;
- rtm->rtm_type = r->r_action;
- rtm->rtm_flags = r->r_flags;
-
- if (r->r_dst_len)
- RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
- if (r->r_src_len)
- RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
- if (r->r_ifname[0])
- RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
- if (r->r_preference)
- RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
- if (r->r_srcmap)
- RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ return 0;
}
int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
{
- int idx = 0;
- int s_idx = cb->args[0];
- struct dn_fib_rule *r;
- struct hlist_node *node;
-
- rcu_read_lock();
- hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
- if (idx < s_idx)
- continue;
- if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
- break;
- idx++;
- }
- rcu_read_unlock();
- cb->args[0] = idx;
-
- return skb->len;
+ return fib_rules_dump(skb, cb, AF_DECnet);
}
+static struct fib_rules_ops dn_fib_rules_ops = {
+ .family = AF_DECnet,
+ .rule_size = sizeof(struct dn_fib_rule),
+ .action = dn_fib_rule_action,
+ .match = dn_fib_rule_match,
+ .configure = dn_fib_rule_configure,
+ .compare = dn_fib_rule_compare,
+ .fill = dn_fib_rule_fill,
+ .default_pref = dn_fib_rule_default_pref,
+ .nlgroup = RTNLGRP_DECnet_RULE,
+ .policy = dn_fib_rule_policy,
+ .rules_list = &dn_fib_rules,
+ .owner = THIS_MODULE,
+};
+
void __init dn_fib_rules_init(void)
{
- INIT_HLIST_HEAD(&dn_fib_rules);
- hlist_add_head(&default_rule.r_hlist, &dn_fib_rules);
- register_netdevice_notifier(&dn_fib_rules_notifier);
+ list_add_tail(&default_rule.common.list, &dn_fib_rules);
+ fib_rules_register(&dn_fib_rules_ops);
}
void __exit dn_fib_rules_cleanup(void)
{
- unregister_netdevice_notifier(&dn_fib_rules_notifier);
+ fib_rules_unregister(&dn_fib_rules_ops);
}
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 0ebc46af1bdd..317904bb5896 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -12,7 +12,6 @@
* Changes:
*
*/
-#include <linux/config.h>
#include <linux/string.h>
#include <linux/net.h>
#include <linux/socket.h>
@@ -31,6 +30,7 @@
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_route.h>
#include <net/dn_fib.h>
@@ -75,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
#define RT_TABLE_MIN 1
-
+#define DN_FIB_TABLE_HASHSZ 256
+static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
static DEFINE_RWLOCK(dn_fib_tables_lock);
-struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
static kmem_cache_t *dn_hash_kmem __read_mostly;
static int dn_fib_hash_zombies;
@@ -159,12 +159,10 @@ static void dn_rehash_zone(struct dn_zone *dz)
break;
}
- ht = kmalloc(new_divisor*sizeof(struct dn_fib_node*), GFP_KERNEL);
-
+ ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
if (ht == NULL)
return;
- memset(ht, 0, new_divisor*sizeof(struct dn_fib_node *));
write_lock_bh(&dn_fib_tables_lock);
old_ht = dz->dz_hash;
dz->dz_hash = ht;
@@ -185,11 +183,10 @@ static void dn_free_node(struct dn_fib_node *f)
static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
{
int i;
- struct dn_zone *dz = kmalloc(sizeof(struct dn_zone), GFP_KERNEL);
+ struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
if (!dz)
return NULL;
- memset(dz, 0, sizeof(struct dn_zone));
if (z) {
dz->dz_divisor = 16;
dz->dz_hashmask = 0x0F;
@@ -198,14 +195,12 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
dz->dz_hashmask = 0;
}
- dz->dz_hash = kmalloc(dz->dz_divisor*sizeof(struct dn_fib_node *), GFP_KERNEL);
-
+ dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
if (!dz->dz_hash) {
kfree(dz);
return NULL;
}
- memset(dz->dz_hash, 0, dz->dz_divisor*sizeof(struct dn_fib_node*));
dz->dz_order = z;
dz->dz_mask = dnet_make_mask(z);
@@ -269,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
}
static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+ u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
struct dn_fib_info *fi, unsigned int flags)
{
struct rtmsg *rtm;
@@ -283,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
rtm->rtm_src_len = 0;
rtm->rtm_tos = 0;
rtm->rtm_table = tb_id;
+ RTA_PUT_U32(skb, RTA_TABLE, tb_id);
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = scope;
rtm->rtm_type = type;
@@ -332,29 +328,29 @@ rtattr_failure:
}
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
+static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
struct nlmsghdr *nlh, struct netlink_skb_parms *req)
{
struct sk_buff *skb;
u32 pid = req ? req->pid : 0;
- int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256);
+ int err = -ENOBUFS;
- skb = alloc_skb(size, GFP_KERNEL);
- if (!skb)
- return;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
- if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
- f->fn_type, f->fn_scope, &f->fn_key, z,
- DN_FIB_INFO(f), 0) < 0) {
+ err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+ f->fn_type, f->fn_scope, &f->fn_key, z,
+ DN_FIB_INFO(f), 0);
+ if (err < 0) {
kfree_skb(skb);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
- if (nlh->nlmsg_flags & NLM_F_ECHO)
- atomic_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
- if (nlh->nlmsg_flags & NLM_F_ECHO)
- netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+ err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
}
static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
@@ -365,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
{
int i, s_i;
- s_i = cb->args[3];
+ s_i = cb->args[4];
for(i = 0; f; i++, f = f->fn_next) {
if (i < s_i)
continue;
@@ -378,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
f->fn_scope, &f->fn_key, dz->dz_order,
f->fn_info, NLM_F_MULTI) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -393,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
{
int h, s_h;
- s_h = cb->args[2];
+ s_h = cb->args[3];
for(h = 0; h < dz->dz_divisor; h++) {
if (h < s_h)
continue;
if (h > s_h)
- memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
continue;
if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
- cb->args[2] = h;
+ cb->args[3] = h;
return -1;
}
}
- cb->args[2] = h;
+ cb->args[3] = h;
return skb->len;
}
@@ -417,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
struct dn_zone *dz;
struct dn_hash *table = (struct dn_hash *)tb->data;
- s_m = cb->args[1];
+ s_m = cb->args[2];
read_lock(&dn_fib_tables_lock);
for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
if (m < s_m)
continue;
if (m > s_m)
- memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+ memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
- cb->args[1] = m;
+ cb->args[2] = m;
read_unlock(&dn_fib_tables_lock);
return -1;
}
}
read_unlock(&dn_fib_tables_lock);
- cb->args[1] = m;
+ cb->args[2] = m;
return skb->len;
}
+int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct dn_fib_table *tb;
+ struct hlist_node *node;
+ int dumped = 0;
+
+ if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+ return dn_cache_dump(skb, cb);
+
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+ for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
+ e = 0;
+ hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+ if (e < s_e)
+ goto next;
+ if (dumped)
+ memset(&cb->args[2], 0, sizeof(cb->args) -
+ 2 * sizeof(cb->args[0]));
+ if (tb->dump(tb, skb, cb) < 0)
+ goto out;
+ dumped = 1;
+next:
+ e++;
+ }
+ }
+out:
+ cb->args[1] = e;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
{
struct dn_hash *table = (struct dn_hash *)tb->data;
@@ -745,9 +778,11 @@ out:
}
-struct dn_fib_table *dn_fib_get_table(int n, int create)
+struct dn_fib_table *dn_fib_get_table(u32 n, int create)
{
struct dn_fib_table *t;
+ struct hlist_node *node;
+ unsigned int h;
if (n < RT_TABLE_MIN)
return NULL;
@@ -755,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(int n, int create)
if (n > RT_TABLE_MAX)
return NULL;
- if (dn_fib_tables[n])
- return dn_fib_tables[n];
+ h = n & (DN_FIB_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+ if (t->n == n) {
+ rcu_read_unlock();
+ return t;
+ }
+ }
+ rcu_read_unlock();
if (!create)
return NULL;
@@ -777,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(int n, int create)
t->flush = dn_fib_table_flush;
t->dump = dn_fib_table_dump;
memset(t->data, 0, sizeof(struct dn_hash));
- dn_fib_tables[n] = t;
+ hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
return t;
}
-static void dn_fib_del_tree(int n)
-{
- struct dn_fib_table *t;
-
- write_lock(&dn_fib_tables_lock);
- t = dn_fib_tables[n];
- dn_fib_tables[n] = NULL;
- write_unlock(&dn_fib_tables_lock);
-
- kfree(t);
-}
-
struct dn_fib_table *dn_fib_empty_table(void)
{
- int id;
+ u32 id;
for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
- if (dn_fib_tables[id] == NULL)
+ if (dn_fib_get_table(id, 0) == NULL)
return dn_fib_get_table(id, 1);
return NULL;
}
+void dn_fib_flush(void)
+{
+ int flushed = 0;
+ struct dn_fib_table *tb;
+ struct hlist_node *node;
+ unsigned int h;
+
+ for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+ hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+ flushed += tb->flush(tb);
+ }
+
+ if (flushed)
+ dn_rt_cache_flush(-1);
+}
+
void __init dn_fib_table_init(void)
{
dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
@@ -814,10 +860,17 @@ void __init dn_fib_table_init(void)
void __exit dn_fib_table_cleanup(void)
{
- int i;
-
- for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i)
- dn_fib_del_tree(i);
+ struct dn_fib_table *t;
+ struct hlist_node *node, *next;
+ unsigned int h;
- return;
+ write_lock(&dn_fib_tables_lock);
+ for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+ hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+ hlist) {
+ hlist_del(&t->hlist);
+ kfree(t);
+ }
+ }
+ write_unlock(&dn_fib_tables_lock);
}
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 74133ecd7700..8b99bd33540d 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
- if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
/* Eventually we might send routing messages too */
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index bda5920215fd..e246f054f368 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -13,7 +13,6 @@
* Steve Whitehouse - Memory buffer settings, like the tcp ones
*
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/fs.h>
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 868265619dbb..4d66aac13483 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -9,7 +9,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -674,12 +673,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
edev = dev->ec_ptr;
if (edev == NULL) {
/* Magic up a new one. */
- edev = kmalloc(sizeof(struct ec_device), GFP_KERNEL);
+ edev = kzalloc(sizeof(struct ec_device), GFP_KERNEL);
if (edev == NULL) {
err = -ENOMEM;
break;
}
- memset(edev, 0, sizeof(struct ec_device));
dev->ec_ptr = edev;
} else
net2dev_map[edev->net] = NULL;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index c971f14712ec..4bd78c8cfb26 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -51,7 +51,6 @@
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/if_ether.h>
#include <net/dst.h>
@@ -65,81 +64,79 @@
__setup("ether=", netdev_boot_setup);
-/*
- * Create the Ethernet MAC header for an arbitrary protocol layer
+/**
+ * eth_header - create the Ethernet header
+ * @skb: buffer to alter
+ * @dev: source device
+ * @type: Ethernet type field
+ * @daddr: destination address (NULL leave destination address)
+ * @saddr: source address (NULL use device source address)
+ * @len: packet length (<= skb->len)
*
- * saddr=NULL means use device source address
- * daddr=NULL means leave destination address (eg unresolved arp)
+ *
+ * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+ * in here instead. It is up to the 802.2 layer to carry protocol information.
*/
-
int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
- void *daddr, void *saddr, unsigned len)
+ void *daddr, void *saddr, unsigned len)
{
- struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+ struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
- /*
- * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
- * in here instead. It is up to the 802.2 layer to carry protocol information.
- */
-
- if(type!=ETH_P_802_3)
+ if (type != ETH_P_802_3)
eth->h_proto = htons(type);
else
eth->h_proto = htons(len);
/*
- * Set the source hardware address.
+ * Set the source hardware address.
*/
-
- if(!saddr)
+
+ if (!saddr)
saddr = dev->dev_addr;
- memcpy(eth->h_source,saddr,dev->addr_len);
+ memcpy(eth->h_source, saddr, dev->addr_len);
- if(daddr)
- {
- memcpy(eth->h_dest,daddr,dev->addr_len);
+ if (daddr) {
+ memcpy(eth->h_dest, daddr, dev->addr_len);
return ETH_HLEN;
}
-
+
/*
- * Anyway, the loopback-device should never use this function...
+ * Anyway, the loopback-device should never use this function...
*/
- if (dev->flags & (IFF_LOOPBACK|IFF_NOARP))
- {
+ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
memset(eth->h_dest, 0, dev->addr_len);
return ETH_HLEN;
}
-
+
return -ETH_HLEN;
}
-
-/*
- * Rebuild the Ethernet MAC header. This is called after an ARP
- * (or in future other address resolution) has completed on this
- * sk_buff. We now let ARP fill in the other fields.
+/**
+ * eth_rebuild_header- rebuild the Ethernet MAC header.
+ * @skb: socket buffer to update
*
- * This routine CANNOT use cached dst->neigh!
- * Really, it is used only when dst->neigh is wrong.
+ * This is called after an ARP or IPV6 ndisc it's resolution on this
+ * sk_buff. We now let protocol (ARP) fill in the other fields.
+ *
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
*/
-
int eth_rebuild_header(struct sk_buff *skb)
{
struct ethhdr *eth = (struct ethhdr *)skb->data;
struct net_device *dev = skb->dev;
- switch (eth->h_proto)
- {
+ switch (eth->h_proto) {
#ifdef CONFIG_INET
case __constant_htons(ETH_P_IP):
- return arp_find(eth->h_dest, skb);
-#endif
+ return arp_find(eth->h_dest, skb);
+#endif
default:
printk(KERN_DEBUG
- "%s: unable to resolve type %X addresses.\n",
+ "%s: unable to resolve type %X addresses.\n",
dev->name, (int)eth->h_proto);
-
+
memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
break;
}
@@ -147,62 +144,70 @@ int eth_rebuild_header(struct sk_buff *skb)
return 0;
}
-
-/*
- * Determine the packet's protocol ID. The rule here is that we
- * assume 802.3 if the type field is short enough to be a length.
- * This is normal practice and works for any 'now in use' protocol.
+/**
+ * eth_type_trans - determine the packet's protocol ID.
+ * @skb: received socket data
+ * @dev: receiving network device
+ *
+ * The rule here is that we
+ * assume 802.3 if the type field is short enough to be a length.
+ * This is normal practice and works for any 'now in use' protocol.
*/
-
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct ethhdr *eth;
unsigned char *rawp;
-
+
skb->mac.raw = skb->data;
- skb_pull(skb,ETH_HLEN);
+ skb_pull(skb, ETH_HLEN);
eth = eth_hdr(skb);
-
+
if (is_multicast_ether_addr(eth->h_dest)) {
if (!compare_ether_addr(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_MULTICAST;
}
-
+
/*
- * This ALLMULTI check should be redundant by 1.4
- * so don't forget to remove it.
+ * This ALLMULTI check should be redundant by 1.4
+ * so don't forget to remove it.
*
- * Seems, you forgot to remove it. All silly devices
- * seems to set IFF_PROMISC.
+ * Seems, you forgot to remove it. All silly devices
+ * seems to set IFF_PROMISC.
*/
-
- else if(1 /*dev->flags&IFF_PROMISC*/) {
+
+ else if (1 /*dev->flags&IFF_PROMISC */ ) {
if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
skb->pkt_type = PACKET_OTHERHOST;
}
-
+
if (ntohs(eth->h_proto) >= 1536)
return eth->h_proto;
-
+
rawp = skb->data;
-
+
/*
- * This is a magic hack to spot IPX packets. Older Novell breaks
- * the protocol design and runs IPX over 802.3 without an 802.2 LLC
- * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
- * won't work for fault tolerant netware but does for the rest.
+ * This is a magic hack to spot IPX packets. Older Novell breaks
+ * the protocol design and runs IPX over 802.3 without an 802.2 LLC
+ * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+ * won't work for fault tolerant netware but does for the rest.
*/
if (*(unsigned short *)rawp == 0xFFFF)
return htons(ETH_P_802_3);
-
+
/*
- * Real 802.2 LLC
+ * Real 802.2 LLC
*/
return htons(ETH_P_802_2);
}
+EXPORT_SYMBOL(eth_type_trans);
+/**
+ * eth_header_parse - extract hardware address from packet
+ * @skb: packet to extract header from
+ * @haddr: destination buffer
+ */
static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
{
struct ethhdr *eth = eth_hdr(skb);
@@ -210,14 +215,20 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
return ETH_ALEN;
}
+/**
+ * eth_header_cache - fill cache entry from neighbour
+ * @neigh: source neighbour
+ * @hh: destination cache entry
+ * Create an Ethernet header template from the neighbour.
+ */
int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
{
- unsigned short type = hh->hh_type;
+ __be16 type = hh->hh_type;
struct ethhdr *eth;
struct net_device *dev = neigh->dev;
- eth = (struct ethhdr*)
- (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
+ eth = (struct ethhdr *)
+ (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
if (type == __constant_htons(ETH_P_802_3))
return -1;
@@ -229,27 +240,47 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
return 0;
}
-/*
+/**
+ * eth_header_cache_update - update cache entry
+ * @hh: destination cache entry
+ * @dev: network device
+ * @haddr: new hardware address
+ *
* Called by Address Resolution module to notify changes in address.
*/
-
-void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+ unsigned char *haddr)
{
- memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
+ memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
haddr, dev->addr_len);
}
-EXPORT_SYMBOL(eth_type_trans);
-
+/**
+ * eth_mac_addr - set new Ethernet hardware address
+ * @dev: network device
+ * @p: socket address
+ * Change hardware address of device.
+ *
+ * This doesn't change hardware matching, so needs to be overridden
+ * for most real devices.
+ */
static int eth_mac_addr(struct net_device *dev, void *p)
{
- struct sockaddr *addr=p;
+ struct sockaddr *addr = p;
if (netif_running(dev))
return -EBUSY;
- memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
return 0;
}
+/**
+ * eth_change_mtu - set new MTU size
+ * @dev: network device
+ * @new_mtu: new Maximum Transfer Unit
+ *
+ * Allow changing MTU size. Needs to be overridden for devices
+ * supporting jumbo frames.
+ */
static int eth_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
@@ -258,8 +289,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-/*
- * Fill in the fields of the device structure with ethernet-generic values.
+/**
+ * ether_setup - setup Ethernet network device
+ * @dev: network device
+ * Fill in the fields of the device structure with Ethernet-generic values.
*/
void ether_setup(struct net_device *dev)
{
@@ -278,21 +311,21 @@ void ether_setup(struct net_device *dev)
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
- memset(dev->broadcast,0xFF, ETH_ALEN);
+ memset(dev->broadcast, 0xFF, ETH_ALEN);
}
EXPORT_SYMBOL(ether_setup);
/**
- * alloc_etherdev - Allocates and sets up an ethernet device
+ * alloc_etherdev - Allocates and sets up an Ethernet device
* @sizeof_priv: Size of additional driver-private structure to be allocated
- * for this ethernet device
+ * for this Ethernet device
*
- * Fill in the fields of the device structure with ethernet-generic
+ * Fill in the fields of the device structure with Ethernet-generic
* values. Basically does everything except registering the device.
*
* Constructs a new net device, complete with a private data area of
- * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for
* this private data area.
*/
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index dbb08528ddf5..f7e84e9d13ad 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -58,6 +58,7 @@ config IEEE80211_CRYPT_TKIP
depends on IEEE80211 && NET_RADIO
select CRYPTO
select CRYPTO_MICHAEL_MIC
+ select CRC32
---help---
Include software based cipher suites in support of IEEE 802.11i
(aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index cb71d794a7d1..5ed0a98b2d76 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -110,11 +110,10 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops)
unsigned long flags;
struct ieee80211_crypto_alg *alg;
- alg = kmalloc(sizeof(*alg), GFP_KERNEL);
+ alg = kzalloc(sizeof(*alg), GFP_KERNEL);
if (alg == NULL)
return -ENOMEM;
- memset(alg, 0, sizeof(*alg));
alg->ops = ops;
spin_lock_irqsave(&ieee80211_crypto_lock, flags);
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 78b2d13e80e3..35aa3426c3fa 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -9,7 +9,7 @@
* more details.
*/
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -49,7 +49,7 @@ struct ieee80211_ccmp_data {
int key_idx;
- struct crypto_tfm *tfm;
+ struct crypto_cipher *tfm;
/* scratch buffers for virt_to_page() (crypto API) */
u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
@@ -57,36 +57,26 @@ struct ieee80211_ccmp_data {
u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
};
-static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm,
- const u8 pt[16], u8 ct[16])
+static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
+ const u8 pt[16], u8 ct[16])
{
- struct scatterlist src, dst;
-
- src.page = virt_to_page(pt);
- src.offset = offset_in_page(pt);
- src.length = AES_BLOCK_LEN;
-
- dst.page = virt_to_page(ct);
- dst.offset = offset_in_page(ct);
- dst.length = AES_BLOCK_LEN;
-
- crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN);
+ crypto_cipher_encrypt_one(tfm, ct, pt);
}
static void *ieee80211_ccmp_init(int key_idx)
{
struct ieee80211_ccmp_data *priv;
- priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+ priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
if (priv == NULL)
goto fail;
- memset(priv, 0, sizeof(*priv));
priv->key_idx = key_idx;
- priv->tfm = crypto_alloc_tfm("aes", 0);
- if (priv->tfm == NULL) {
+ priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->tfm)) {
printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
"crypto API aes\n");
+ priv->tfm = NULL;
goto fail;
}
@@ -95,7 +85,7 @@ static void *ieee80211_ccmp_init(int key_idx)
fail:
if (priv) {
if (priv->tfm)
- crypto_free_tfm(priv->tfm);
+ crypto_free_cipher(priv->tfm);
kfree(priv);
}
@@ -106,7 +96,7 @@ static void ieee80211_ccmp_deinit(void *priv)
{
struct ieee80211_ccmp_data *_priv = priv;
if (_priv && _priv->tfm)
- crypto_free_tfm(_priv->tfm);
+ crypto_free_cipher(_priv->tfm);
kfree(priv);
}
@@ -117,7 +107,7 @@ static inline void xor_block(u8 * b, u8 * a, size_t len)
b[i] ^= a[i];
}
-static void ccmp_init_blocks(struct crypto_tfm *tfm,
+static void ccmp_init_blocks(struct crypto_cipher *tfm,
struct ieee80211_hdr_4addr *hdr,
u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
{
@@ -273,6 +263,27 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
return 0;
}
+/*
+ * deal with seq counter wrapping correctly.
+ * refer to timer_after() for jiffies wrapping handling
+ */
+static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o)
+{
+ u32 iv32_n, iv16_n;
+ u32 iv32_o, iv16_o;
+
+ iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3];
+ iv16_n = (pn_n[4] << 8) | pn_n[5];
+
+ iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3];
+ iv16_o = (pn_o[4] << 8) | pn_o[5];
+
+ if ((s32)iv32_n - (s32)iv32_o < 0 ||
+ (iv32_n == iv32_o && iv16_n <= iv16_o))
+ return 1;
+ return 0;
+}
+
static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct ieee80211_ccmp_data *key = priv;
@@ -325,7 +336,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
pn[5] = pos[0];
pos += 8;
- if (memcmp(pn, key->rx_pn, CCMP_PN_LEN) <= 0) {
+ if (ccmp_replay_check(pn, key->rx_pn)) {
if (net_ratelimit()) {
printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT
" previous PN %02x%02x%02x%02x%02x%02x "
@@ -379,7 +390,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
{
struct ieee80211_ccmp_data *data = priv;
int keyidx;
- struct crypto_tfm *tfm = data->tfm;
+ struct crypto_cipher *tfm = data->tfm;
keyidx = data->key_idx;
memset(data, 0, sizeof(*data));
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 3fa5df2e1f0b..4200ec509866 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -9,7 +9,7 @@
* more details.
*/
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -53,8 +53,10 @@ struct ieee80211_tkip_data {
int key_idx;
- struct crypto_tfm *tfm_arc4;
- struct crypto_tfm *tfm_michael;
+ struct crypto_blkcipher *rx_tfm_arc4;
+ struct crypto_hash *rx_tfm_michael;
+ struct crypto_blkcipher *tx_tfm_arc4;
+ struct crypto_hash *tx_tfm_michael;
/* scratch buffers for virt_to_page() (crypto API) */
u8 rx_hdr[16], tx_hdr[16];
@@ -86,17 +88,39 @@ static void *ieee80211_tkip_init(int key_idx)
priv->key_idx = key_idx;
- priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0);
- if (priv->tfm_arc4 == NULL) {
+ priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->tx_tfm_arc4)) {
printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
"crypto API arc4\n");
+ priv->tx_tfm_arc4 = NULL;
goto fail;
}
- priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0);
- if (priv->tfm_michael == NULL) {
+ priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->tx_tfm_michael)) {
printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
"crypto API michael_mic\n");
+ priv->tx_tfm_michael = NULL;
+ goto fail;
+ }
+
+ priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->rx_tfm_arc4)) {
+ printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
+ "crypto API arc4\n");
+ priv->rx_tfm_arc4 = NULL;
+ goto fail;
+ }
+
+ priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->rx_tfm_michael)) {
+ printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
+ "crypto API michael_mic\n");
+ priv->rx_tfm_michael = NULL;
goto fail;
}
@@ -104,10 +128,14 @@ static void *ieee80211_tkip_init(int key_idx)
fail:
if (priv) {
- if (priv->tfm_michael)
- crypto_free_tfm(priv->tfm_michael);
- if (priv->tfm_arc4)
- crypto_free_tfm(priv->tfm_arc4);
+ if (priv->tx_tfm_michael)
+ crypto_free_hash(priv->tx_tfm_michael);
+ if (priv->tx_tfm_arc4)
+ crypto_free_blkcipher(priv->tx_tfm_arc4);
+ if (priv->rx_tfm_michael)
+ crypto_free_hash(priv->rx_tfm_michael);
+ if (priv->rx_tfm_arc4)
+ crypto_free_blkcipher(priv->rx_tfm_arc4);
kfree(priv);
}
@@ -117,10 +145,16 @@ static void *ieee80211_tkip_init(int key_idx)
static void ieee80211_tkip_deinit(void *priv)
{
struct ieee80211_tkip_data *_priv = priv;
- if (_priv && _priv->tfm_michael)
- crypto_free_tfm(_priv->tfm_michael);
- if (_priv && _priv->tfm_arc4)
- crypto_free_tfm(_priv->tfm_arc4);
+ if (_priv) {
+ if (_priv->tx_tfm_michael)
+ crypto_free_hash(_priv->tx_tfm_michael);
+ if (_priv->tx_tfm_arc4)
+ crypto_free_blkcipher(_priv->tx_tfm_arc4);
+ if (_priv->rx_tfm_michael)
+ crypto_free_hash(_priv->rx_tfm_michael);
+ if (_priv->rx_tfm_arc4)
+ crypto_free_blkcipher(_priv->rx_tfm_arc4);
+ }
kfree(priv);
}
@@ -319,6 +353,7 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct ieee80211_tkip_data *tkey = priv;
+ struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 };
int len;
u8 rc4key[16], *pos, *icv;
u32 crc;
@@ -352,18 +387,30 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+ crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
sg.page = virt_to_page(pos);
sg.offset = offset_in_page(pos);
sg.length = len + 4;
- crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4);
+ return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
+}
+/*
+ * deal with seq counter wrapping correctly.
+ * refer to timer_after() for jiffies wrapping handling
+ */
+static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
+ u32 iv32_o, u16 iv16_o)
+{
+ if ((s32)iv32_n - (s32)iv32_o < 0 ||
+ (iv32_n == iv32_o && iv16_n <= iv16_o))
+ return 1;
return 0;
}
static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct ieee80211_tkip_data *tkey = priv;
+ struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 };
u8 rc4key[16];
u8 keyidx, *pos;
u32 iv32;
@@ -415,8 +462,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
pos += 8;
- if (iv32 < tkey->rx_iv32 ||
- (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) {
+ if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
if (net_ratelimit()) {
printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT
" previous TSC %08x%04x received TSC "
@@ -435,11 +481,18 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 12;
- crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+ crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
sg.page = virt_to_page(pos);
sg.offset = offset_in_page(pos);
sg.length = plen + 4;
- crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4);
+ if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG ": TKIP: failed to decrypt "
+ "received packet from " MAC_FMT "\n",
+ MAC_ARG(hdr->addr2));
+ }
+ return -7;
+ }
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
@@ -473,12 +526,13 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
return keyidx;
}
-static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr,
+static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
u8 * data, size_t data_len, u8 * mic)
{
+ struct hash_desc desc;
struct scatterlist sg[2];
- if (tkey->tfm_michael == NULL) {
+ if (tfm_michael == NULL) {
printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
return -1;
}
@@ -490,12 +544,12 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr,
sg[1].offset = offset_in_page(data);
sg[1].length = data_len;
- crypto_digest_init(tkey->tfm_michael);
- crypto_digest_setkey(tkey->tfm_michael, key, 8);
- crypto_digest_update(tkey->tfm_michael, sg, 2);
- crypto_digest_final(tkey->tfm_michael, mic);
+ if (crypto_hash_setkey(tfm_michael, key, 8))
+ return -1;
- return 0;
+ desc.tfm = tfm_michael;
+ desc.flags = 0;
+ return crypto_hash_digest(&desc, sg, data_len + 16, mic);
}
static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -529,7 +583,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
if (stype & IEEE80211_STYPE_QOS_DATA) {
const struct ieee80211_hdr_3addrqos *qoshdr =
(struct ieee80211_hdr_3addrqos *)skb->data;
- hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID;
+ hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID);
} else
hdr[12] = 0; /* priority */
@@ -551,7 +605,7 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len,
michael_mic_hdr(skb, tkey->tx_hdr);
pos = skb_put(skb, 8);
- if (michael_mic(tkey, &tkey->key[16], tkey->tx_hdr,
+ if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
return -1;
@@ -589,7 +643,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
return -1;
michael_mic_hdr(skb, tkey->rx_hdr);
- if (michael_mic(tkey, &tkey->key[24], tkey->rx_hdr,
+ if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr,
skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
return -1;
if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
@@ -619,14 +673,18 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
{
struct ieee80211_tkip_data *tkey = priv;
int keyidx;
- struct crypto_tfm *tfm = tkey->tfm_michael;
- struct crypto_tfm *tfm2 = tkey->tfm_arc4;
+ struct crypto_hash *tfm = tkey->tx_tfm_michael;
+ struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4;
+ struct crypto_hash *tfm3 = tkey->rx_tfm_michael;
+ struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4;
keyidx = tkey->key_idx;
memset(tkey, 0, sizeof(*tkey));
tkey->key_idx = keyidx;
- tkey->tfm_michael = tfm;
- tkey->tfm_arc4 = tfm2;
+ tkey->tx_tfm_michael = tfm;
+ tkey->tx_tfm_arc4 = tfm2;
+ tkey->rx_tfm_michael = tfm3;
+ tkey->rx_tfm_arc4 = tfm4;
if (len == TKIP_KEY_LEN) {
memcpy(tkey->key, key, TKIP_KEY_LEN);
tkey->key_set = 1;
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 649e581fa565..1b2efff11d39 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -9,7 +9,7 @@
* more details.
*/
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -33,26 +33,34 @@ struct prism2_wep_data {
u8 key[WEP_KEY_LEN + 1];
u8 key_len;
u8 key_idx;
- struct crypto_tfm *tfm;
+ struct crypto_blkcipher *tx_tfm;
+ struct crypto_blkcipher *rx_tfm;
};
static void *prism2_wep_init(int keyidx)
{
struct prism2_wep_data *priv;
- priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+ priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
if (priv == NULL)
goto fail;
- memset(priv, 0, sizeof(*priv));
priv->key_idx = keyidx;
- priv->tfm = crypto_alloc_tfm("arc4", 0);
- if (priv->tfm == NULL) {
+ priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->tx_tfm)) {
printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
"crypto API arc4\n");
+ priv->tx_tfm = NULL;
goto fail;
}
+ priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(priv->rx_tfm)) {
+ printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
+ "crypto API arc4\n");
+ priv->rx_tfm = NULL;
+ goto fail;
+ }
/* start WEP IV from a random value */
get_random_bytes(&priv->iv, 4);
@@ -60,8 +68,10 @@ static void *prism2_wep_init(int keyidx)
fail:
if (priv) {
- if (priv->tfm)
- crypto_free_tfm(priv->tfm);
+ if (priv->tx_tfm)
+ crypto_free_blkcipher(priv->tx_tfm);
+ if (priv->rx_tfm)
+ crypto_free_blkcipher(priv->rx_tfm);
kfree(priv);
}
return NULL;
@@ -70,8 +80,12 @@ static void *prism2_wep_init(int keyidx)
static void prism2_wep_deinit(void *priv)
{
struct prism2_wep_data *_priv = priv;
- if (_priv && _priv->tfm)
- crypto_free_tfm(_priv->tfm);
+ if (_priv) {
+ if (_priv->tx_tfm)
+ crypto_free_blkcipher(_priv->tx_tfm);
+ if (_priv->rx_tfm)
+ crypto_free_blkcipher(_priv->rx_tfm);
+ }
kfree(priv);
}
@@ -122,6 +136,7 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len,
static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct prism2_wep_data *wep = priv;
+ struct blkcipher_desc desc = { .tfm = wep->tx_tfm };
u32 crc, klen, len;
u8 *pos, *icv;
struct scatterlist sg;
@@ -153,13 +168,11 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_cipher_setkey(wep->tfm, key, klen);
+ crypto_blkcipher_setkey(wep->tx_tfm, key, klen);
sg.page = virt_to_page(pos);
sg.offset = offset_in_page(pos);
sg.length = len + 4;
- crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4);
-
- return 0;
+ return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
}
/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -172,6 +185,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct prism2_wep_data *wep = priv;
+ struct blkcipher_desc desc = { .tfm = wep->rx_tfm };
u32 crc, klen, plen;
u8 key[WEP_KEY_LEN + 3];
u8 keyidx, *pos, icv[4];
@@ -196,11 +210,12 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
/* Apply RC4 to data and compute CRC32 over decrypted data */
plen = skb->len - hdr_len - 8;
- crypto_cipher_setkey(wep->tfm, key, klen);
+ crypto_blkcipher_setkey(wep->rx_tfm, key, klen);
sg.page = virt_to_page(pos);
sg.offset = offset_in_page(pos);
sg.length = plen + 4;
- crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4);
+ if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
+ return -7;
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c
index 192243ab35ed..305a09de85a5 100644
--- a/net/ieee80211/ieee80211_geo.c
+++ b/net/ieee80211/ieee80211_geo.c
@@ -24,7 +24,6 @@
******************************************************************************/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 2cb84d84f671..13b1e5fff7e4 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -31,7 +31,6 @@
*******************************************************************************/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 2bf567fd5a17..770704183a1b 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -14,7 +14,6 @@
*/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
@@ -369,6 +368,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
/* Put this code here so that we avoid duplicating it in all
* Rx paths. - Jean II */
+#ifdef CONFIG_WIRELESS_EXT
#ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */
/* If spy monitoring on */
if (ieee->spy_data.spy_number > 0) {
@@ -397,15 +397,16 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
wireless_spy_update(ieee->dev, hdr->addr2, &wstats);
}
#endif /* IW_WIRELESS_SPY */
+#endif /* CONFIG_WIRELESS_EXT */
#ifdef NOT_YET
hostap_update_rx_stats(local->ap, hdr, rx_stats);
#endif
if (ieee->iw_mode == IW_MODE_MONITOR) {
- ieee80211_monitor_rx(ieee, skb, rx_stats);
stats->rx_packets++;
stats->rx_bytes += skb->len;
+ ieee80211_monitor_rx(ieee, skb, rx_stats);
return 1;
}
@@ -778,33 +779,44 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
return 0;
}
-/* Filter out unrelated packets, call ieee80211_rx[_mgt] */
-int ieee80211_rx_any(struct ieee80211_device *ieee,
+/* Filter out unrelated packets, call ieee80211_rx[_mgt]
+ * This function takes over the skb, it should not be used again after calling
+ * this function. */
+void ieee80211_rx_any(struct ieee80211_device *ieee,
struct sk_buff *skb, struct ieee80211_rx_stats *stats)
{
struct ieee80211_hdr_4addr *hdr;
int is_packet_for_us;
u16 fc;
- if (ieee->iw_mode == IW_MODE_MONITOR)
- return ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL;
+ if (ieee->iw_mode == IW_MODE_MONITOR) {
+ if (!ieee80211_rx(ieee, skb, stats))
+ dev_kfree_skb_irq(skb);
+ return;
+ }
+
+ if (skb->len < sizeof(struct ieee80211_hdr))
+ goto drop_free;
hdr = (struct ieee80211_hdr_4addr *)skb->data;
fc = le16_to_cpu(hdr->frame_ctl);
if ((fc & IEEE80211_FCTL_VERS) != 0)
- return -EINVAL;
+ goto drop_free;
switch (fc & IEEE80211_FCTL_FTYPE) {
case IEEE80211_FTYPE_MGMT:
+ if (skb->len < sizeof(struct ieee80211_hdr_3addr))
+ goto drop_free;
ieee80211_rx_mgt(ieee, hdr, stats);
- return 0;
+ dev_kfree_skb_irq(skb);
+ return;
case IEEE80211_FTYPE_DATA:
break;
case IEEE80211_FTYPE_CTL:
- return 0;
+ return;
default:
- return -EINVAL;
+ return;
}
is_packet_for_us = 0;
@@ -848,8 +860,14 @@ int ieee80211_rx_any(struct ieee80211_device *ieee,
}
if (is_packet_for_us)
- return (ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL);
- return 0;
+ if (!ieee80211_rx(ieee, skb, stats))
+ dev_kfree_skb_irq(skb);
+ return;
+
+drop_free:
+ dev_kfree_skb_irq(skb);
+ ieee->stats.rx_dropped++;
+ return;
}
#define MGMT_FRAME_FIXED_PART_LENGTH 0x24
@@ -1060,13 +1078,16 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
while (length >= sizeof(*info_element)) {
if (sizeof(*info_element) + info_element->len > length) {
- IEEE80211_DEBUG_MGMT("Info elem: parse failed: "
- "info_element->len + 2 > left : "
- "info_element->len+2=%zd left=%d, id=%d.\n",
- info_element->len +
- sizeof(*info_element),
- length, info_element->id);
- return 1;
+ IEEE80211_ERROR("Info elem: parse failed: "
+ "info_element->len + 2 > left : "
+ "info_element->len+2=%zd left=%d, id=%d.\n",
+ info_element->len +
+ sizeof(*info_element),
+ length, info_element->id);
+ /* We stop processing but don't return an error here
+ * because some misbehaviour APs break this rule. ie.
+ * Orinoco AP1000. */
+ break;
}
switch (info_element->id) {
@@ -1165,6 +1186,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
case MFIE_TYPE_ERP_INFO:
network->erp_value = info_element->data[0];
+ network->flags |= NETWORK_HAS_ERP_VALUE;
IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n",
network->erp_value);
break;
@@ -1728,5 +1750,6 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee,
}
}
+EXPORT_SYMBOL_GPL(ieee80211_rx_any);
EXPORT_SYMBOL(ieee80211_rx_mgt);
EXPORT_SYMBOL(ieee80211_rx);
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 6a5de1b84459..ae254497ba3d 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -24,7 +24,6 @@
******************************************************************************/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
@@ -338,7 +337,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
hdr_len += 2;
skb->priority = ieee80211_classify(skb);
- header.qos_ctl |= skb->priority & IEEE80211_QCTL_TID;
+ header.qos_ctl |= cpu_to_le16(skb->priority & IEEE80211_QCTL_TID);
}
header.frame_ctl = cpu_to_le16(fc);
@@ -533,13 +532,6 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
return 0;
}
- if (ret == NETDEV_TX_BUSY) {
- printk(KERN_ERR "%s: NETDEV_TX_BUSY returned; "
- "driver should report queue full via "
- "ieee_device->is_queue_full.\n",
- ieee->dev->name);
- }
-
ieee80211_txb_free(txb);
}
@@ -563,10 +555,13 @@ int ieee80211_tx_frame(struct ieee80211_device *ieee,
struct net_device_stats *stats = &ieee->stats;
struct sk_buff *skb_frag;
int priority = -1;
+ int fraglen = total_len;
+ int headroom = ieee->tx_headroom;
+ struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
spin_lock_irqsave(&ieee->lock, flags);
- if (encrypt_mpdu && !ieee->sec.encrypt)
+ if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt))
encrypt_mpdu = 0;
/* If there is no driver handler to take the TXB, dont' bother
@@ -582,20 +577,24 @@ int ieee80211_tx_frame(struct ieee80211_device *ieee,
goto success;
}
- if (encrypt_mpdu)
+ if (encrypt_mpdu) {
frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ fraglen += crypt->ops->extra_mpdu_prefix_len +
+ crypt->ops->extra_mpdu_postfix_len;
+ headroom += crypt->ops->extra_mpdu_prefix_len;
+ }
/* When we allocate the TXB we allocate enough space for the reserve
* and full fragment bytes (bytes_per_frag doesn't include prefix,
* postfix, header, FCS, etc.) */
- txb = ieee80211_alloc_txb(1, total_len, ieee->tx_headroom, GFP_ATOMIC);
+ txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC);
if (unlikely(!txb)) {
printk(KERN_WARNING "%s: Could not allocate TXB\n",
ieee->dev->name);
goto failed;
}
txb->encrypted = 0;
- txb->payload_size = total_len;
+ txb->payload_size = fraglen;
skb_frag = txb->fragments[0];
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index a78c4f845f66..5cb9cfd35397 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -369,11 +369,10 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
struct ieee80211_crypt_data *new_crypt;
/* take WEP into use */
- new_crypt = kmalloc(sizeof(struct ieee80211_crypt_data),
+ new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
GFP_KERNEL);
if (new_crypt == NULL)
return -ENOMEM;
- memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data));
new_crypt->ops = ieee80211_get_crypto_ops("WEP");
if (!new_crypt->ops) {
request_module("ieee80211_crypt_wep");
@@ -616,13 +615,11 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
ieee80211_crypt_delayed_deinit(ieee, crypt);
- new_crypt = (struct ieee80211_crypt_data *)
- kmalloc(sizeof(*new_crypt), GFP_KERNEL);
+ new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL);
if (new_crypt == NULL) {
ret = -ENOMEM;
goto done;
}
- memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data));
new_crypt->ops = ops;
if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
new_crypt->priv = new_crypt->ops->init(idx);
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index 5e9a90651d04..589f6d2c548a 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -47,9 +47,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft
dprintk(KERN_INFO PFX "sent association request!\n");
- /* Change the state to associating */
spin_lock_irqsave(&mac->lock, flags);
- mac->associnfo.associating = 1;
mac->associated = 0; /* just to make sure */
/* Set a timer for timeout */
@@ -63,6 +61,7 @@ void
ieee80211softmac_assoc_timeout(void *d)
{
struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
+ struct ieee80211softmac_network *n;
unsigned long flags;
spin_lock_irqsave(&mac->lock, flags);
@@ -75,11 +74,12 @@ ieee80211softmac_assoc_timeout(void *d)
mac->associnfo.associating = 0;
mac->associnfo.bssvalid = 0;
mac->associated = 0;
+
+ n = ieee80211softmac_get_network_by_bssid_locked(mac, mac->associnfo.bssid);
spin_unlock_irqrestore(&mac->lock, flags);
dprintk(KERN_INFO PFX "assoc request timed out!\n");
- /* FIXME: we need to know the network here. that requires a bit of restructuring */
- ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, NULL);
+ ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, n);
}
void
@@ -96,7 +96,7 @@ ieee80211softmac_disassoc(struct ieee80211softmac_device *mac)
mac->associated = 0;
mac->associnfo.bssvalid = 0;
mac->associnfo.associating = 0;
- ieee80211softmac_init_txrates(mac);
+ ieee80211softmac_init_bss(mac);
ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL);
spin_unlock_irqrestore(&mac->lock, flags);
}
@@ -203,6 +203,10 @@ ieee80211softmac_assoc_work(void *d)
if (mac->associated)
ieee80211softmac_send_disassoc_req(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT);
+ spin_lock_irqsave(&mac->lock, flags);
+ mac->associnfo.associating = 1;
+ spin_unlock_irqrestore(&mac->lock, flags);
+
/* try to find the requested network in our list, if we found one already */
if (bssvalid || mac->associnfo.bssfixed)
found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);
@@ -295,19 +299,32 @@ ieee80211softmac_assoc_work(void *d)
memcpy(mac->associnfo.associate_essid.data, found->essid.data, IW_ESSID_MAX_SIZE + 1);
/* we found a network! authenticate (if necessary) and associate to it. */
- if (!found->authenticated) {
+ if (found->authenticating) {
+ dprintk(KERN_INFO PFX "Already requested authentication, waiting...\n");
+ if(!mac->associnfo.assoc_wait) {
+ mac->associnfo.assoc_wait = 1;
+ ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL);
+ }
+ return;
+ }
+ if (!found->authenticated && !found->authenticating) {
/* This relies on the fact that _auth_req only queues the work,
* otherwise adding the notification would be racy. */
if (!ieee80211softmac_auth_req(mac, found)) {
- dprintk(KERN_INFO PFX "cannot associate without being authenticated, requested authentication\n");
- ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL);
+ if(!mac->associnfo.assoc_wait) {
+ dprintk(KERN_INFO PFX "Cannot associate without being authenticated, requested authentication\n");
+ mac->associnfo.assoc_wait = 1;
+ ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL);
+ }
} else {
printkl(KERN_WARNING PFX "Not authenticated, but requesting authentication failed. Giving up to associate\n");
+ mac->associnfo.assoc_wait = 0;
ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, found);
}
return;
}
/* finally! now we can start associating */
+ mac->associnfo.assoc_wait = 0;
ieee80211softmac_assoc(mac, found);
}
@@ -317,11 +334,19 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac,
struct ieee80211_assoc_response * resp,
struct ieee80211softmac_network *net)
{
+ u16 cap = le16_to_cpu(resp->capability);
+ u8 erp_value = net->erp_value;
+
mac->associnfo.associating = 0;
- mac->associnfo.supported_rates = net->supported_rates;
+ mac->bssinfo.supported_rates = net->supported_rates;
ieee80211softmac_recalc_txrates(mac);
mac->associated = 1;
+
+ mac->associnfo.short_preamble_available =
+ (cap & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0;
+ ieee80211softmac_process_erp(mac, erp_value);
+
if (mac->set_bssid_filter)
mac->set_bssid_filter(mac->dev, net->bssid);
memcpy(mac->ieee->bssid, net->bssid, ETH_ALEN);
@@ -334,9 +359,9 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac,
int
ieee80211softmac_handle_assoc_response(struct net_device * dev,
struct ieee80211_assoc_response * resp,
- struct ieee80211_network * _ieee80211_network_do_not_use)
+ struct ieee80211_network * _ieee80211_network)
{
- /* NOTE: the network parameter has to be ignored by
+ /* NOTE: the network parameter has to be mostly ignored by
* this code because it is the ieee80211's pointer
* to the struct, not ours (we made a copy)
*/
@@ -368,6 +393,11 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev,
/* now that we know it was for us, we can cancel the timeout */
cancel_delayed_work(&mac->associnfo.timeout);
+ /* if the association response included an ERP IE, update our saved
+ * copy */
+ if (_ieee80211_network->flags & NETWORK_HAS_ERP_VALUE)
+ network->erp_value = _ieee80211_network->erp_value;
+
switch (status) {
case 0:
dprintk(KERN_INFO PFX "associated!\n");
diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index 90b8484e509b..4cef39e171d0 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -36,8 +36,9 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac,
struct ieee80211softmac_auth_queue_item *auth;
unsigned long flags;
- if (net->authenticating)
+ if (net->authenticating || net->authenticated)
return 0;
+ net->authenticating = 1;
/* Add the network if it's not already added */
ieee80211softmac_add_network(mac, net);
@@ -92,7 +93,6 @@ ieee80211softmac_auth_queue(void *data)
return;
}
net->authenticated = 0;
- net->authenticating = 1;
/* add a timeout call so we eventually give up waiting for an auth reply */
schedule_delayed_work(&auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT);
auth->retry--;
@@ -116,6 +116,16 @@ ieee80211softmac_auth_queue(void *data)
kfree(auth);
}
+/* Sends a response to an auth challenge (for shared key auth). */
+static void
+ieee80211softmac_auth_challenge_response(void *_aq)
+{
+ struct ieee80211softmac_auth_queue_item *aq = _aq;
+
+ /* Send our response */
+ ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state);
+}
+
/* Handle the auth response from the AP
* This should be registered with ieee80211 as handle_auth
*/
@@ -197,24 +207,30 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE:
/* Check to make sure we have a challenge IE */
data = (u8 *)auth->info_element;
- if(*data++ != MFIE_TYPE_CHALLENGE){
+ if (*data++ != MFIE_TYPE_CHALLENGE) {
printkl(KERN_NOTICE PFX "Shared Key Authentication failed due to a missing challenge.\n");
break;
}
/* Save the challenge */
spin_lock_irqsave(&mac->lock, flags);
net->challenge_len = *data++;
- if(net->challenge_len > WLAN_AUTH_CHALLENGE_LEN)
+ if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN)
net->challenge_len = WLAN_AUTH_CHALLENGE_LEN;
- if(net->challenge != NULL)
+ if (net->challenge != NULL)
kfree(net->challenge);
net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC);
memcpy(net->challenge, data, net->challenge_len);
aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE;
- spin_unlock_irqrestore(&mac->lock, flags);
- /* Send our response */
- ieee80211softmac_send_mgt_frame(mac, aq->net, IEEE80211_STYPE_AUTH, aq->state);
+ /* We reuse the work struct from the auth request here.
+ * It is safe to do so as each one is per-request, and
+ * at this point (dealing with authentication response)
+ * we have obviously already sent the initial auth
+ * request. */
+ cancel_delayed_work(&aq->work);
+ INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq);
+ schedule_work(&aq->work);
+ spin_unlock_irqrestore(&mac->lock, flags);
return 0;
case IEEE80211SOFTMAC_AUTH_SHARED_PASS:
kfree(net->challenge);
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 09541611e48c..82bfddbf33a2 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -96,8 +96,7 @@ ieee80211softmac_alloc_mgt(u32 size)
if(size > IEEE80211_DATA_LEN)
return NULL;
/* Allocate the frame */
- data = kmalloc(size, GFP_ATOMIC);
- memset(data, 0, size);
+ data = kzalloc(size, GFP_ATOMIC);
return data;
}
@@ -229,6 +228,9 @@ ieee80211softmac_assoc_req(struct ieee80211_assoc_request **pkt,
return 0;
ieee80211softmac_hdr_3addr(mac, &((*pkt)->header), IEEE80211_STYPE_ASSOC_REQ, net->bssid, net->bssid);
+ /* Fill in the capabilities */
+ (*pkt)->capability = ieee80211softmac_capabilities(mac, net);
+
/* Fill in Listen Interval (?) */
(*pkt)->listen_interval = cpu_to_le16(10);
@@ -465,3 +467,17 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac,
kfree(pkt);
return 0;
}
+
+/* Beacon handling */
+int ieee80211softmac_handle_beacon(struct net_device *dev,
+ struct ieee80211_beacon *beacon,
+ struct ieee80211_network *network)
+{
+ struct ieee80211softmac_device *mac = ieee80211_priv(dev);
+
+ if (mac->associated && memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0)
+ ieee80211softmac_process_erp(mac, network->erp_value);
+
+ return 0;
+}
+
diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c
index 4b2e57d12418..addea1cf73ae 100644
--- a/net/ieee80211/softmac/ieee80211softmac_module.c
+++ b/net/ieee80211/softmac/ieee80211softmac_module.c
@@ -44,6 +44,7 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv)
softmac->ieee->handle_assoc_response = ieee80211softmac_handle_assoc_response;
softmac->ieee->handle_reassoc_request = ieee80211softmac_handle_reassoc_req;
softmac->ieee->handle_disassoc = ieee80211softmac_handle_disassoc;
+ softmac->ieee->handle_beacon = ieee80211softmac_handle_beacon;
softmac->scaninfo = NULL;
softmac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
@@ -178,21 +179,14 @@ int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo
return 0;
}
-/* Finds the highest rate which is:
- * 1. Present in ri (optionally a basic rate)
- * 2. Supported by the device
- * 3. Less than or equal to the user-defined rate
- */
-static u8 highest_supported_rate(struct ieee80211softmac_device *mac,
+u8 ieee80211softmac_highest_supported_rate(struct ieee80211softmac_device *mac,
struct ieee80211softmac_ratesinfo *ri, int basic_only)
{
u8 user_rate = mac->txrates.user_rate;
int i;
- if (ri->count == 0) {
- dprintk(KERN_ERR PFX "empty ratesinfo?\n");
+ if (ri->count == 0)
return IEEE80211_CCK_RATE_1MB;
- }
for (i = ri->count - 1; i >= 0; i--) {
u8 rate = ri->rates[i];
@@ -208,36 +202,61 @@ static u8 highest_supported_rate(struct ieee80211softmac_device *mac,
/* If we haven't found a suitable rate by now, just trust the user */
return user_rate;
}
+EXPORT_SYMBOL_GPL(ieee80211softmac_highest_supported_rate);
+
+void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac,
+ u8 erp_value)
+{
+ int use_protection;
+ int short_preamble;
+ u32 changes = 0;
+
+ /* Barker preamble mode */
+ short_preamble = ((erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0
+ && mac->associnfo.short_preamble_available) ? 1 : 0;
+
+ /* Protection needed? */
+ use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
+
+ if (mac->bssinfo.short_preamble != short_preamble) {
+ changes |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE;
+ mac->bssinfo.short_preamble = short_preamble;
+ }
+
+ if (mac->bssinfo.use_protection != use_protection) {
+ changes |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION;
+ mac->bssinfo.use_protection = use_protection;
+ }
+
+ if (mac->bssinfo_change && changes)
+ mac->bssinfo_change(mac->dev, changes);
+}
void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac)
{
struct ieee80211softmac_txrates *txrates = &mac->txrates;
- struct ieee80211softmac_txrates oldrates;
u32 change = 0;
- if (mac->txrates_change)
- oldrates = mac->txrates;
-
change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT;
- txrates->default_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 0);
+ txrates->default_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 0);
change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK;
txrates->default_fallback = lower_rate(mac, txrates->default_rate);
change |= IEEE80211SOFTMAC_TXRATECHG_MCAST;
- txrates->mcast_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 1);
+ txrates->mcast_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 1);
if (mac->txrates_change)
- mac->txrates_change(mac->dev, change, &oldrates);
+ mac->txrates_change(mac->dev, change);
}
-void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac)
+void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac)
{
struct ieee80211_device *ieee = mac->ieee;
u32 change = 0;
struct ieee80211softmac_txrates *txrates = &mac->txrates;
- struct ieee80211softmac_txrates oldrates;
+ struct ieee80211softmac_bss_info *bssinfo = &mac->bssinfo;
/* TODO: We need some kind of state machine to lower the default rates
* if we loose too many packets.
@@ -245,8 +264,6 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac)
/* Change the default txrate to the highest possible value.
* The txrate machine will lower it, if it is too high.
*/
- if (mac->txrates_change)
- oldrates = mac->txrates;
/* FIXME: We don't correctly handle backing down to lower
rates, so 801.11g devices start off at 11M for now. People
can manually change it if they really need to, but 11M is
@@ -272,7 +289,23 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac)
change |= IEEE80211SOFTMAC_TXRATECHG_MGT_MCAST;
if (mac->txrates_change)
- mac->txrates_change(mac->dev, change, &oldrates);
+ mac->txrates_change(mac->dev, change);
+
+ change = 0;
+
+ bssinfo->supported_rates.count = 0;
+ memset(bssinfo->supported_rates.rates, 0,
+ sizeof(bssinfo->supported_rates.rates));
+ change |= IEEE80211SOFTMAC_BSSINFOCHG_RATES;
+
+ bssinfo->short_preamble = 0;
+ change |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE;
+
+ bssinfo->use_protection = 0;
+ change |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION;
+
+ if (mac->bssinfo_change)
+ mac->bssinfo_change(mac->dev, change);
mac->running = 1;
}
@@ -282,7 +315,7 @@ void ieee80211softmac_start(struct net_device *dev)
struct ieee80211softmac_device *mac = ieee80211_priv(dev);
ieee80211softmac_start_check_rates(mac);
- ieee80211softmac_init_txrates(mac);
+ ieee80211softmac_init_bss(mac);
}
EXPORT_SYMBOL_GPL(ieee80211softmac_start);
@@ -335,7 +368,6 @@ u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rat
static void ieee80211softmac_add_txrates_badness(struct ieee80211softmac_device *mac,
int amount)
{
- struct ieee80211softmac_txrates oldrates;
u8 default_rate = mac->txrates.default_rate;
u8 default_fallback = mac->txrates.default_fallback;
u32 changes = 0;
@@ -348,8 +380,6 @@ printk("badness %d\n", mac->txrate_badness);
mac->txrate_badness += amount;
if (mac->txrate_badness <= -1000) {
/* Very small badness. Try a faster bitrate. */
- if (mac->txrates_change)
- memcpy(&oldrates, &mac->txrates, sizeof(oldrates));
default_rate = raise_rate(mac, default_rate);
changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT;
default_fallback = get_fallback_rate(mac, default_rate);
@@ -358,8 +388,6 @@ printk("badness %d\n", mac->txrate_badness);
printk("Bitrate raised to %u\n", default_rate);
} else if (mac->txrate_badness >= 10000) {
/* Very high badness. Try a slower bitrate. */
- if (mac->txrates_change)
- memcpy(&oldrates, &mac->txrates, sizeof(oldrates));
default_rate = lower_rate(mac, default_rate);
changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT;
default_fallback = get_fallback_rate(mac, default_rate);
@@ -372,7 +400,7 @@ printk("Bitrate lowered to %u\n", default_rate);
mac->txrates.default_fallback = default_fallback;
if (changes && mac->txrates_change)
- mac->txrates_change(mac->dev, changes, &oldrates);
+ mac->txrates_change(mac->dev, changes);
}
void ieee80211softmac_fragment_lost(struct net_device *dev,
@@ -416,7 +444,11 @@ ieee80211softmac_create_network(struct ieee80211softmac_device *mac,
memcpy(&softnet->supported_rates.rates[softnet->supported_rates.count], net->rates_ex, net->rates_ex_len);
softnet->supported_rates.count += net->rates_ex_len;
sort(softnet->supported_rates.rates, softnet->supported_rates.count, sizeof(softnet->supported_rates.rates[0]), rate_cmp, NULL);
-
+
+ /* we save the ERP value because it is needed at association time, and
+ * many AP's do not include an ERP IE in the association response. */
+ softnet->erp_value = net->erp_value;
+
softnet->capabilities = net->capability;
return softnet;
}
diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h
index fa1f8e3acfc0..0642e090b8a7 100644
--- a/net/ieee80211/softmac/ieee80211softmac_priv.h
+++ b/net/ieee80211/softmac/ieee80211softmac_priv.h
@@ -116,9 +116,11 @@ ieee80211softmac_get_network_by_essid(struct ieee80211softmac_device *mac,
struct ieee80211softmac_essid *essid);
/* Rates related */
+void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac,
+ u8 erp_value);
int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo *ri, u8 rate);
u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rate, int delta);
-void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac);
+void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac);
void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac);
static inline u8 lower_rate(struct ieee80211softmac_device *mac, u8 rate) {
return ieee80211softmac_lower_rate_delta(mac, rate, 1);
@@ -133,6 +135,9 @@ static inline u8 get_fallback_rate(struct ieee80211softmac_device *mac, u8 rate)
/*** prototypes from _io.c */
int ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac,
void* ptrarg, u32 type, u32 arg);
+int ieee80211softmac_handle_beacon(struct net_device *dev,
+ struct ieee80211_beacon *beacon,
+ struct ieee80211_network *network);
/*** prototypes from _auth.c */
/* do these have to go into the public header? */
@@ -189,6 +194,7 @@ struct ieee80211softmac_network {
authenticated:1,
auth_desynced_once:1;
+ u8 erp_value; /* Saved ERP value */
u16 capabilities; /* Capabilities bitfield */
u8 challenge_len; /* Auth Challenge length */
char *challenge; /* Challenge Text */
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index 0e65ff4e33fc..2aa779d18f38 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -70,15 +70,47 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
char *extra)
{
struct ieee80211softmac_device *sm = ieee80211_priv(net_dev);
+ struct ieee80211softmac_network *n;
+ struct ieee80211softmac_auth_queue_item *authptr;
int length = 0;
unsigned long flags;
-
+
+ /* Check if we're already associating to this or another network
+ * If it's another network, cancel and start over with our new network
+ * If it's our network, ignore the change, we're already doing it!
+ */
+ if((sm->associnfo.associating || sm->associated) &&
+ (data->essid.flags && data->essid.length)) {
+ /* Get the associating network */
+ n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid);
+ if(n && n->essid.len == data->essid.length &&
+ !memcmp(n->essid.data, extra, n->essid.len)) {
+ dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n",
+ MAC_ARG(sm->associnfo.bssid));
+ return 0;
+ } else {
+ dprintk(KERN_INFO PFX "Canceling existing associate request!\n");
+ spin_lock_irqsave(&sm->lock,flags);
+ /* Cancel assoc work */
+ cancel_delayed_work(&sm->associnfo.work);
+ /* We don't have to do this, but it's a little cleaner */
+ list_for_each_entry(authptr, &sm->auth_queue, list)
+ cancel_delayed_work(&authptr->work);
+ sm->associnfo.bssvalid = 0;
+ sm->associnfo.bssfixed = 0;
+ spin_unlock_irqrestore(&sm->lock,flags);
+ flush_scheduled_work();
+ }
+ }
+
+
spin_lock_irqsave(&sm->lock, flags);
-
+
sm->associnfo.static_essid = 0;
+ sm->associnfo.assoc_wait = 0;
- if (data->essid.flags && data->essid.length && extra /*required?*/) {
- length = min(data->essid.length - 1, IW_ESSID_MAX_SIZE);
+ if (data->essid.flags && data->essid.length) {
+ length = min((int)data->essid.length, IW_ESSID_MAX_SIZE);
if (length) {
memcpy(sm->associnfo.req_essid.data, extra, length);
sm->associnfo.static_essid = 1;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index da33393be45f..30af4a4dfcc8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
config IP_MULTIPLE_TABLES
bool "IP: policy routing"
depends on IP_ADVANCED_ROUTER
+ select FIB_RULES
---help---
Normally, a router decides what to do with a received packet based
solely on the packet's final destination address. If you say Y here,
@@ -386,6 +387,7 @@ config INET_ESP
select CRYPTO
select CRYPTO_HMAC
select CRYPTO_MD5
+ select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
---help---
@@ -446,24 +448,22 @@ config INET_TCP_DIAG
depends on INET_DIAG
def_tristate INET_DIAG
-config TCP_CONG_ADVANCED
+menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
Support for selection of various TCP congestion control
modules.
Nearly all users can safely say no here, and a safe default
- selection will be made (BIC-TCP with new Reno as a fallback).
+ selection will be made (CUBIC with new Reno as a fallback).
If unsure, say N.
-# TCP Reno is builtin (required as fallback)
-menu "TCP congestion control"
- depends on TCP_CONG_ADVANCED
+if TCP_CONG_ADVANCED
config TCP_CONG_BIC
tristate "Binary Increase Congestion (BIC) control"
- default y
+ default m
---help---
BIC-TCP is a sender-side only change that ensures a linear RTT
fairness under large windows while offering both scalability and
@@ -477,7 +477,7 @@ config TCP_CONG_BIC
config TCP_CONG_CUBIC
tristate "CUBIC TCP"
- default m
+ default y
---help---
This is version 2.0 of BIC-TCP which uses a cubic growth function
among other techniques.
@@ -572,22 +572,49 @@ config TCP_CONG_VENO
loss packets.
See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
-config TCP_CONG_COMPOUND
- tristate "TCP Compound"
- depends on EXPERIMENTAL
- default n
- ---help---
- TCP Compound is a sender-side only change to TCP that uses
- a mixed Reno/Vegas approach to calculate the cwnd.
- For further details look here:
- ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
+choice
+ prompt "Default TCP congestion control"
+ default DEFAULT_CUBIC
+ help
+ Select the TCP congestion control that will be used by default
+ for all connections.
-endmenu
+ config DEFAULT_BIC
+ bool "Bic" if TCP_CONG_BIC=y
-config TCP_CONG_BIC
+ config DEFAULT_CUBIC
+ bool "Cubic" if TCP_CONG_CUBIC=y
+
+ config DEFAULT_HTCP
+ bool "Htcp" if TCP_CONG_HTCP=y
+
+ config DEFAULT_VEGAS
+ bool "Vegas" if TCP_CONG_VEGAS=y
+
+ config DEFAULT_WESTWOOD
+ bool "Westwood" if TCP_CONG_WESTWOOD=y
+
+ config DEFAULT_RENO
+ bool "Reno"
+
+endchoice
+
+endif
+
+config TCP_CONG_CUBIC
tristate
depends on !TCP_CONG_ADVANCED
default y
+config DEFAULT_TCP_CONG
+ string
+ default "bic" if DEFAULT_BIC
+ default "cubic" if DEFAULT_CUBIC
+ default "htcp" if DEFAULT_HTCP
+ default "vegas" if DEFAULT_VEGAS
+ default "westwood" if DEFAULT_WESTWOOD
+ default "reno" if DEFAULT_RENO
+ default "cubic"
+
source "net/ipv4/ipvs/Kconfig"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 38b8039bdd55..f66049e28aeb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
-obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o
+obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 461216b47948..edcf0932ac6d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock)
}
/* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
* Shall we try to damage output packets if routing dev changes?
*/
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
static int inet_sk_reselect_saddr(struct sock *sk)
{
@@ -997,7 +996,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct rtable *rt;
__u32 old_saddr = inet->saddr;
__u32 new_saddr;
- __u32 daddr = inet->daddr;
+ __be32 daddr = inet->daddr;
if (inet->opt && inet->opt->srr)
daddr = inet->opt->faddr;
@@ -1044,7 +1043,7 @@ int inet_sk_rebuild_header(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
- u32 daddr;
+ __be32 daddr;
int err;
/* Route is OK, nothing to do. */
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk)
},
};
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, 0);
}
if (!err)
@@ -1097,7 +1097,41 @@ int inet_sk_rebuild_header(struct sock *sk)
EXPORT_SYMBOL(inet_sk_rebuild_header);
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
+static int inet_gso_send_check(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ struct net_protocol *ops;
+ int proto;
+ int ihl;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+ goto out;
+
+ iph = skb->nh.iph;
+ ihl = iph->ihl * 4;
+ if (ihl < sizeof(*iph))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, ihl)))
+ goto out;
+
+ skb->h.raw = __skb_pull(skb, ihl);
+ iph = skb->nh.iph;
+ proto = iph->protocol & (MAX_INET_PROTOS - 1);
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet_protos[proto]);
+ if (likely(ops && ops->gso_send_check))
+ err = ops->gso_send_check(skb);
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct iphdr *iph;
@@ -1106,7 +1140,15 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
int ihl;
int id;
- if (!pskb_may_pull(skb, sizeof(*iph)))
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_TCPV4 |
+ SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
iph = skb->nh.iph;
@@ -1114,7 +1156,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
if (ihl < sizeof(*iph))
goto out;
- if (!pskb_may_pull(skb, ihl))
+ if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
skb->h.raw = __skb_pull(skb, ihl);
@@ -1125,11 +1167,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
rcu_read_lock();
ops = rcu_dereference(inet_protos[proto]);
- if (ops && ops->gso_segment)
- segs = ops->gso_segment(skb, sg);
+ if (likely(ops && ops->gso_segment))
+ segs = ops->gso_segment(skb, features);
rcu_read_unlock();
- if (IS_ERR(segs))
+ if (!segs || unlikely(IS_ERR(segs)))
goto out;
skb = segs;
@@ -1154,6 +1196,7 @@ static struct net_protocol igmp_protocol = {
static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
+ .gso_send_check = tcp_v4_gso_send_check,
.gso_segment = tcp_tso_segment,
.no_policy = 1,
};
@@ -1200,6 +1243,7 @@ static int ipv4_proc_init(void);
static struct packet_type ip_packet_type = {
.type = __constant_htons(ETH_P_IP),
.func = ip_rcv,
+ .gso_send_check = inet_gso_send_check,
.gso_segment = inet_gso_segment,
};
@@ -1210,10 +1254,7 @@ static int __init inet_init(void)
struct list_head *r;
int rc = -EINVAL;
- if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
- printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
- goto out;
- }
+ BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
rc = proto_register(&tcp_prot, 1);
if (rc)
@@ -1301,10 +1342,10 @@ static int __init inet_init(void)
rc = 0;
out:
return rc;
-out_unregister_tcp_proto:
- proto_unregister(&tcp_prot);
out_unregister_udp_proto:
proto_unregister(&udp_prot);
+out_unregister_tcp_proto:
+ proto_unregister(&tcp_prot);
goto out;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index c7782230080d..99542977e47e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,4 +1,4 @@
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
switch (*optptr) {
case IPOPT_SEC:
case 0x85: /* Some "Extended Security" crap. */
- case 0x86: /* Another "Commercial Security" crap. */
+ case IPOPT_CIPSO:
case IPOPT_RA:
case 0x80|21: /* RFC1770 */
break;
@@ -98,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(++x->replay.oseq);
xfrm_aevent_doreplay(x);
- ahp->icv(ahp, skb, ah->auth_data);
+ err = ah_mac_digest(ahp, skb, ah->auth_data);
+ if (err)
+ goto error;
+ memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
top_iph->tos = iph->tos;
top_iph->ttl = iph->ttl;
@@ -120,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
{
int ah_hlen;
int ihl;
+ int err = -EINVAL;
struct iphdr *iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
@@ -167,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
skb_push(skb, ihl);
- ahp->icv(ahp, skb, ah->auth_data);
- if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+ err = ah_mac_digest(ahp, skb, ah->auth_data);
+ if (err)
+ goto out;
+ err = -EINVAL;
+ if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
x->stats.integrity_failed++;
goto out;
}
@@ -180,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
return 0;
out:
- return -EINVAL;
+ return err;
}
static void ah4_err(struct sk_buff *skb, u32 info)
@@ -205,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x)
{
struct ah_data *ahp = NULL;
struct xfrm_algo_desc *aalg_desc;
+ struct crypto_hash *tfm;
if (!x->aalg)
goto error;
@@ -216,32 +224,33 @@ static int ah_init_state(struct xfrm_state *x)
if (x->encap)
goto error;
- ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+ ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
if (ahp == NULL)
return -ENOMEM;
- memset(ahp, 0, sizeof(*ahp));
-
ahp->key = x->aalg->alg_key;
ahp->key_len = (x->aalg->alg_key_len+7)/8;
- ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
- if (!ahp->tfm)
+ tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ goto error;
+
+ ahp->tfm = tfm;
+ if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
goto error;
- ahp->icv = ah_hmac_digest;
/*
* Lookup the algorithm description maintained by xfrm_algo,
* verify crypto transform properties, and store information
* we need for AH processing. This lookup cannot fail here
- * after a successful crypto_alloc_tfm().
+ * after a successful crypto_alloc_hash().
*/
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_tfm_alg_digestsize(ahp->tfm)) {
+ crypto_hash_digestsize(tfm)) {
printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
- x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+ x->aalg->alg_name, crypto_hash_digestsize(tfm),
aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
@@ -256,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
goto error;
x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
x->data = ahp;
@@ -265,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x)
error:
if (ahp) {
kfree(ahp->work_icv);
- crypto_free_tfm(ahp->tfm);
+ crypto_free_hash(ahp->tfm);
kfree(ahp);
}
return -EINVAL;
@@ -280,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x)
kfree(ahp->work_icv);
ahp->work_icv = NULL;
- crypto_free_tfm(ahp->tfm);
+ crypto_free_hash(ahp->tfm);
ahp->tfm = NULL;
kfree(ahp);
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4749d504c629..cfe5c8474286 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -80,7 +80,6 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
@@ -235,7 +234,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev)
static int arp_constructor(struct neighbour *neigh)
{
- u32 addr = *(u32*)neigh->primary_key;
+ __be32 addr = *(__be32*)neigh->primary_key;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
@@ -331,10 +330,10 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
- u32 saddr = 0;
+ __be32 saddr = 0;
u8 *dst_ha = NULL;
struct net_device *dev = neigh->dev;
- u32 target = *(u32*)neigh->primary_key;
+ __be32 target = *(__be32*)neigh->primary_key;
int probes = atomic_read(&neigh->probes);
struct in_device *in_dev = in_dev_get(dev);
@@ -386,7 +385,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
}
static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
- u32 sip, u32 tip)
+ __be32 sip, __be32 tip)
{
int scope;
@@ -421,7 +420,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
return !inet_confirm_addr(dev, sip, tip, scope);
}
-static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
+static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
.saddr = tip } } };
@@ -450,7 +449,7 @@ static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
* is allowed to use this function, it is scheduled to be removed. --ANK
*/
-static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct net_device * dev)
+static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev)
{
switch (addr_hint) {
case RTN_LOCAL:
@@ -471,7 +470,7 @@ static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, s
int arp_find(unsigned char *haddr, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- u32 paddr;
+ __be32 paddr;
struct neighbour *n;
if (!skb->dst) {
@@ -512,7 +511,7 @@ int arp_bind_neighbour(struct dst_entry *dst)
if (dev == NULL)
return -EINVAL;
if (n == NULL) {
- u32 nexthop = ((struct rtable*)dst)->rt_gateway;
+ __be32 nexthop = ((struct rtable*)dst)->rt_gateway;
if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
nexthop = 0;
n = __neigh_lookup_errno(
@@ -561,8 +560,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
* Create an arp packet. If (dest_hw == NULL), we create a broadcast
* message.
*/
-struct sk_buff *arp_create(int type, int ptype, u32 dest_ip,
- struct net_device *dev, u32 src_ip,
+struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
unsigned char *dest_hw, unsigned char *src_hw,
unsigned char *target_hw)
{
@@ -676,8 +675,8 @@ void arp_xmit(struct sk_buff *skb)
/*
* Create and send an arp packet.
*/
-void arp_send(int type, int ptype, u32 dest_ip,
- struct net_device *dev, u32 src_ip,
+void arp_send(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
unsigned char *dest_hw, unsigned char *src_hw,
unsigned char *target_hw)
{
@@ -711,7 +710,7 @@ static int arp_process(struct sk_buff *skb)
unsigned char *arp_ptr;
struct rtable *rt;
unsigned char *sha, *tha;
- u32 sip, tip;
+ __be32 sip, tip;
u16 dev_type = dev->type;
int addr_type;
struct neighbour *n;
@@ -970,13 +969,13 @@ out_of_mem:
static int arp_req_set(struct arpreq *r, struct net_device * dev)
{
- u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
int err;
if (r->arp_flags&ATF_PUBL) {
- u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != 0xFFFFFFFF)
+ __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+ if (mask && mask != htonl(0xFFFFFFFF))
return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
@@ -1064,7 +1063,7 @@ static unsigned arp_state_to_flags(struct neighbour *neigh)
static int arp_req_get(struct arpreq *r, struct net_device *dev)
{
- u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
int err = -ENXIO;
@@ -1085,13 +1084,13 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
static int arp_req_delete(struct arpreq *r, struct net_device * dev)
{
int err;
- u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
if (r->arp_flags & ATF_PUBL) {
- u32 mask =
+ __be32 mask =
((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == 0xFFFFFFFF)
+ if (mask == htonl(0xFFFFFFFF))
return pneigh_delete(&arp_tbl, &ip, dev);
if (mask == 0) {
if (dev == NULL) {
@@ -1373,12 +1372,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
- memset(s, 0, sizeof(*s));
rc = seq_open(file, &arp_seq_ops);
if (rc)
goto out_kfree;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 000000000000..a8e2e879a647
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1474 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory. While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+struct cipso_v4_domhsh_entry {
+ char *domain;
+ u32 valid;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+/* List of available DOI definitions */
+/* XXX - Updates should be minimal so having a single lock for the
+ * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
+ * okay. */
+/* XXX - This currently assumes a minimal number of different DOIs in use,
+ * if in practice there are a lot of different DOIs this list should
+ * probably be turned into a hash table or something similar so we
+ * can do quick lookups. */
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+
+/* Label mapping cache */
+int cipso_v4_cache_enabled = 1;
+int cipso_v4_cache_bucketsize = 10;
+#define CIPSO_V4_CACHE_BUCKETBITS 7
+#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS)
+#define CIPSO_V4_CACHE_REORDERLIMIT 10
+struct cipso_v4_map_cache_bkt {
+ spinlock_t lock;
+ u32 size;
+ struct list_head list;
+};
+struct cipso_v4_map_cache_entry {
+ u32 hash;
+ unsigned char *key;
+ size_t key_len;
+
+ struct netlbl_lsm_cache lsm_data;
+
+ u32 activity;
+ struct list_head list;
+};
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+/* Restricted bitmap (tag #1) flags */
+int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_strictvalid = 1;
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
+ * @bitmap: the bitmap
+ * @bitmap_len: length in bits
+ * @offset: starting offset
+ * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
+ *
+ * Description:
+ * Starting at @offset, walk the bitmap from left to right until either the
+ * desired bit is found or we reach the end. Return the bit offset, -1 if
+ * not found, or -2 if error.
+ */
+static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
+ u32 bitmap_len,
+ u32 offset,
+ u8 state)
+{
+ u32 bit_spot;
+ u32 byte_offset;
+ unsigned char bitmask;
+ unsigned char byte;
+
+ /* gcc always rounds to zero when doing integer division */
+ byte_offset = offset / 8;
+ byte = bitmap[byte_offset];
+ bit_spot = offset;
+ bitmask = 0x80 >> (offset % 8);
+
+ while (bit_spot < bitmap_len) {
+ if ((state && (byte & bitmask) == bitmask) ||
+ (state == 0 && (byte & bitmask) == 0))
+ return bit_spot;
+
+ bit_spot++;
+ bitmask >>= 1;
+ if (bitmask == 0) {
+ byte = bitmap[++byte_offset];
+ bitmask = 0x80;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
+ * @bitmap: the bitmap
+ * @bit: the bit
+ * @state: if non-zero, set the bit (1) else clear the bit (0)
+ *
+ * Description:
+ * Set a single bit in the bitmask. Returns zero on success, negative values
+ * on error.
+ */
+static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
+ u32 bit,
+ u8 state)
+{
+ u32 byte_spot;
+ u8 bitmask;
+
+ /* gcc always rounds to zero when doing integer division */
+ byte_spot = bit / 8;
+ bitmask = 0x80 >> (bit % 8);
+ if (state)
+ bitmap[byte_spot] |= bitmask;
+ else
+ bitmap[byte_spot] &= ~bitmask;
+}
+
+/**
+ * cipso_v4_doi_domhsh_free - Frees a domain list entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a domain list entry can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
+{
+ struct cipso_v4_domhsh_entry *ptr;
+
+ ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
+ kfree(ptr->domain);
+ kfree(ptr);
+}
+
+/**
+ * cipso_v4_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry.
+ *
+ */
+static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
+{
+ if (entry->lsm_data.free)
+ entry->lsm_data.free(entry->lsm_data.data);
+ kfree(entry->key);
+ kfree(entry);
+}
+
+/**
+ * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CIPSO tag hashing function. Returns a 32-bit hash value.
+ *
+ */
+static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+ return jhash(key, key_len, 0);
+}
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+/**
+ * cipso_v4_cache_init - Initialize the CIPSO cache
+ *
+ * Description:
+ * Initializes the CIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file. Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int cipso_v4_cache_init(void)
+{
+ u32 iter;
+
+ cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
+ sizeof(struct cipso_v4_map_cache_bkt),
+ GFP_KERNEL);
+ if (cipso_v4_cache == NULL)
+ return -ENOMEM;
+
+ for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+ spin_lock_init(&cipso_v4_cache[iter].lock);
+ cipso_v4_cache[iter].size = 0;
+ INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CIPSO cache. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+void cipso_v4_cache_invalidate(void)
+{
+ struct cipso_v4_map_cache_entry *entry, *tmp_entry;
+ u32 iter;
+
+ for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+ spin_lock_bh(&cipso_v4_cache[iter].lock);
+ list_for_each_entry_safe(entry,
+ tmp_entry,
+ &cipso_v4_cache[iter].list, list) {
+ list_del(&entry->list);
+ cipso_v4_cache_entry_free(entry);
+ }
+ cipso_v4_cache[iter].size = 0;
+ spin_unlock_bh(&cipso_v4_cache[iter].lock);
+ }
+
+ return;
+}
+
+/**
+ * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key. If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes. The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ * 1. The cache entry's activity counter is incremented
+ * 2. The previous (higher ranking) entry's activity counter is decremented
+ * 3. If the difference between the two activity counters is geater than
+ * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int cipso_v4_cache_check(const unsigned char *key,
+ u32 key_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ u32 bkt;
+ struct cipso_v4_map_cache_entry *entry;
+ struct cipso_v4_map_cache_entry *prev_entry = NULL;
+ u32 hash;
+
+ if (!cipso_v4_cache_enabled)
+ return -ENOENT;
+
+ hash = cipso_v4_map_cache_hash(key, key_len);
+ bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ spin_lock_bh(&cipso_v4_cache[bkt].lock);
+ list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
+ if (entry->hash == hash &&
+ entry->key_len == key_len &&
+ memcmp(entry->key, key, key_len) == 0) {
+ entry->activity += 1;
+ secattr->cache.free = entry->lsm_data.free;
+ secattr->cache.data = entry->lsm_data.data;
+ if (prev_entry == NULL) {
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
+ return 0;
+ }
+
+ if (prev_entry->activity > 0)
+ prev_entry->activity -= 1;
+ if (entry->activity > prev_entry->activity &&
+ entry->activity - prev_entry->activity >
+ CIPSO_V4_CACHE_REORDERLIMIT) {
+ __list_del(entry->list.prev, entry->list.next);
+ __list_add(&entry->list,
+ prev_entry->list.prev,
+ &prev_entry->list);
+ }
+
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
+ return 0;
+ }
+ prev_entry = entry;
+ }
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
+
+ return -ENOENT;
+}
+
+/**
+ * cipso_v4_cache_add - Add an entry to the CIPSO cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CIPSO label mapping cache. Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first. It is important to note that there is
+ * currently no checking for duplicate keys. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int cipso_v4_cache_add(const struct sk_buff *skb,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ u32 bkt;
+ struct cipso_v4_map_cache_entry *entry = NULL;
+ struct cipso_v4_map_cache_entry *old_entry = NULL;
+ unsigned char *cipso_ptr;
+ u32 cipso_ptr_len;
+
+ if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ return 0;
+
+ cipso_ptr = CIPSO_V4_OPTPTR(skb);
+ cipso_ptr_len = cipso_ptr[1];
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL)
+ return -ENOMEM;
+ entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+ if (entry->key == NULL) {
+ ret_val = -ENOMEM;
+ goto cache_add_failure;
+ }
+ memcpy(entry->key, cipso_ptr, cipso_ptr_len);
+ entry->key_len = cipso_ptr_len;
+ entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
+ entry->lsm_data.free = secattr->cache.free;
+ entry->lsm_data.data = secattr->cache.data;
+
+ bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ spin_lock_bh(&cipso_v4_cache[bkt].lock);
+ if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ list_add(&entry->list, &cipso_v4_cache[bkt].list);
+ cipso_v4_cache[bkt].size += 1;
+ } else {
+ old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
+ struct cipso_v4_map_cache_entry, list);
+ list_del(&old_entry->list);
+ list_add(&entry->list, &cipso_v4_cache[bkt].list);
+ cipso_v4_cache_entry_free(old_entry);
+ }
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
+
+ return 0;
+
+cache_add_failure:
+ if (entry)
+ cipso_v4_cache_entry_free(entry);
+ return ret_val;
+}
+
+/*
+ * DOI List Functions
+ */
+
+/**
+ * cipso_v4_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi. The caller is responsibile for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
+{
+ struct cipso_v4_doi *iter;
+
+ list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+ if (iter->doi == doi && iter->valid)
+ return iter;
+ return NULL;
+}
+
+/**
+ * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
+ * @doi_def: the DOI structure
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CIPSO engine and calls this
+ * function to add it to the list of acceptable domains. The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see cipso_ipv4.h for details). Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+ if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+ return -EINVAL;
+
+ doi_def->valid = 1;
+ INIT_RCU_HEAD(&doi_def->rcu);
+ INIT_LIST_HEAD(&doi_def->dom_list);
+
+ rcu_read_lock();
+ if (cipso_v4_doi_search(doi_def->doi) != NULL)
+ goto doi_add_failure_rlock;
+ spin_lock(&cipso_v4_doi_list_lock);
+ if (cipso_v4_doi_search(doi_def->doi) != NULL)
+ goto doi_add_failure_slock;
+ list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return 0;
+
+doi_add_failure_slock:
+ spin_unlock(&cipso_v4_doi_list_lock);
+doi_add_failure_rlock:
+ rcu_read_unlock();
+ return -EEXIST;
+}
+
+/**
+ * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
+ * @doi: the DOI value
+ * @audit_secid: the LSM secid to use in the audit message
+ * @callback: the DOI cleanup/free callback
+ *
+ * Description:
+ * Removes a DOI definition from the CIPSO engine, @callback is called to
+ * free any memory. The NetLabel routines will be called to release their own
+ * LSM domain mappings as well as our own domain list. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_doi_remove(u32 doi,
+ struct netlbl_audit *audit_info,
+ void (*callback) (struct rcu_head * head))
+{
+ struct cipso_v4_doi *doi_def;
+ struct cipso_v4_domhsh_entry *dom_iter;
+
+ rcu_read_lock();
+ if (cipso_v4_doi_search(doi) != NULL) {
+ spin_lock(&cipso_v4_doi_list_lock);
+ doi_def = cipso_v4_doi_search(doi);
+ if (doi_def == NULL) {
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ return -ENOENT;
+ }
+ doi_def->valid = 0;
+ list_del_rcu(&doi_def->list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
+ if (dom_iter->valid)
+ netlbl_domhsh_remove(dom_iter->domain,
+ audit_info);
+ cipso_v4_cache_invalidate();
+ rcu_read_unlock();
+
+ call_rcu(&doi_def->rcu, callback);
+ return 0;
+ }
+ rcu_read_unlock();
+
+ return -ENOENT;
+}
+
+/**
+ * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller. Otherwise NULL is returned. The caller must ensure that
+ * rcu_read_lock() is held while accessing the returned definition.
+ *
+ */
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+ return cipso_v4_doi_search(doi);
+}
+
+/**
+ * cipso_v4_doi_walk - Iterate through the DOI definitions
+ * @skip_cnt: skip past this number of DOI definitions, updated
+ * @callback: callback for each DOI definition
+ * @cb_arg: argument for the callback function
+ *
+ * Description:
+ * Iterate over the DOI definition list, skipping the first @skip_cnt entries.
+ * For each entry call @callback, if @callback returns a negative value stop
+ * 'walking' through the list and return. Updates the value in @skip_cnt upon
+ * return. Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_walk(u32 *skip_cnt,
+ int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+ void *cb_arg)
+{
+ int ret_val = -ENOENT;
+ u32 doi_cnt = 0;
+ struct cipso_v4_doi *iter_doi;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
+ if (iter_doi->valid) {
+ if (doi_cnt++ < *skip_cnt)
+ continue;
+ ret_val = callback(iter_doi, cb_arg);
+ if (ret_val < 0) {
+ doi_cnt--;
+ goto doi_walk_return;
+ }
+ }
+
+doi_walk_return:
+ rcu_read_unlock();
+ *skip_cnt = doi_cnt;
+ return ret_val;
+}
+
+/**
+ * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to add
+ *
+ * Description:
+ * Adds the @domain to the the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel). This function
+ * does allocate memory. Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
+{
+ struct cipso_v4_domhsh_entry *iter;
+ struct cipso_v4_domhsh_entry *new_dom;
+
+ new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
+ if (new_dom == NULL)
+ return -ENOMEM;
+ if (domain) {
+ new_dom->domain = kstrdup(domain, GFP_KERNEL);
+ if (new_dom->domain == NULL) {
+ kfree(new_dom);
+ return -ENOMEM;
+ }
+ }
+ new_dom->valid = 1;
+ INIT_RCU_HEAD(&new_dom->rcu);
+
+ rcu_read_lock();
+ spin_lock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+ if (iter->valid &&
+ ((domain != NULL && iter->domain != NULL &&
+ strcmp(iter->domain, domain) == 0) ||
+ (domain == NULL && iter->domain == NULL))) {
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ kfree(new_dom->domain);
+ kfree(new_dom);
+ return -EEXIST;
+ }
+ list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes the @domain from the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel). Returns zero
+ * on success and negative values on error.
+ *
+ */
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+ const char *domain)
+{
+ struct cipso_v4_domhsh_entry *iter;
+
+ rcu_read_lock();
+ spin_lock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+ if (iter->valid &&
+ ((domain != NULL && iter->domain != NULL &&
+ strcmp(iter->domain, domain) == 0) ||
+ (domain == NULL && iter->domain == NULL))) {
+ iter->valid = 0;
+ list_del_rcu(&iter->list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
+
+ return 0;
+ }
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return -ENOENT;
+}
+
+/*
+ * Label Mapping Functions
+ */
+
+/**
+ * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
+ * @doi_def: the DOI definition
+ * @level: the level to check
+ *
+ * Description:
+ * Checks the given level against the given DOI definition and returns a
+ * negative value if the level does not have a valid mapping and a zero value
+ * if the level is defined by the DOI.
+ *
+ */
+static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
+{
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+ return 0;
+ break;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
+ * @doi_def: the DOI definition
+ * @host_lvl: the host MLS level
+ * @net_lvl: the network/CIPSO MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS level to the correct
+ * CIPSO level using the given DOI definition. Returns zero on success,
+ * negative values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
+ u32 host_lvl,
+ u32 *net_lvl)
+{
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ *net_lvl = host_lvl;
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ if (host_lvl < doi_def->map.std->lvl.local_size) {
+ *net_lvl = doi_def->map.std->lvl.local[host_lvl];
+ return 0;
+ }
+ break;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
+ * @doi_def: the DOI definition
+ * @net_lvl: the network/CIPSO MLS level
+ * @host_lvl: the host MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO level to the correct local MLS
+ * level using the given DOI definition. Returns zero on success, negative
+ * values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
+ u32 net_lvl,
+ u32 *host_lvl)
+{
+ struct cipso_v4_std_map_tbl *map_tbl;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ *host_lvl = net_lvl;
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ map_tbl = doi_def->map.std;
+ if (net_lvl < map_tbl->lvl.cipso_size &&
+ map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
+ *host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
+ return 0;
+ }
+ break;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
+ * @doi_def: the DOI definition
+ * @bitmap: category bitmap
+ * @bitmap_len: bitmap length in bytes
+ *
+ * Description:
+ * Checks the given category bitmap against the given DOI definition and
+ * returns a negative value if any of the categories in the bitmap do not have
+ * a valid mapping and a zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
+ const unsigned char *bitmap,
+ u32 bitmap_len)
+{
+ int cat = -1;
+ u32 bitmap_len_bits = bitmap_len * 8;
+ u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
+ u32 *cipso_array = doi_def->map.std->cat.cipso;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ cat = cipso_v4_bitmap_walk(bitmap,
+ bitmap_len_bits,
+ cat + 1,
+ 1);
+ if (cat < 0)
+ break;
+ if (cat >= cipso_cat_size ||
+ cipso_array[cat] >= CIPSO_V4_INV_CAT)
+ return -EFAULT;
+ }
+
+ if (cat == -1)
+ return 0;
+ break;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @host_cat: the category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ * @net_cat: the zero'd out category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO bitmap using the given DOI definition. Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
+ const unsigned char *host_cat,
+ u32 host_cat_len,
+ unsigned char *net_cat,
+ u32 net_cat_len)
+{
+ int host_spot = -1;
+ u32 net_spot;
+ u32 net_spot_max = 0;
+ u32 host_clen_bits = host_cat_len * 8;
+ u32 net_clen_bits = net_cat_len * 8;
+ u32 host_cat_size = doi_def->map.std->cat.local_size;
+ u32 *host_cat_array = doi_def->map.std->cat.local;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ net_spot_max = host_cat_len - 1;
+ while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
+ net_spot_max--;
+ if (net_spot_max > net_cat_len)
+ return -EINVAL;
+ memcpy(net_cat, host_cat, net_spot_max);
+ return net_spot_max;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ host_spot = cipso_v4_bitmap_walk(host_cat,
+ host_clen_bits,
+ host_spot + 1,
+ 1);
+ if (host_spot < 0)
+ break;
+ if (host_spot >= host_cat_size)
+ return -EPERM;
+
+ net_spot = host_cat_array[host_spot];
+ if (net_spot >= net_clen_bits)
+ return -ENOSPC;
+ cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
+
+ if (net_spot > net_spot_max)
+ net_spot_max = net_spot;
+ }
+
+ if (host_spot == -2)
+ return -EFAULT;
+
+ if (++net_spot_max % 8)
+ return net_spot_max / 8 + 1;
+ return net_spot_max / 8;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @host_cat: the zero'd out category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition. Returns the minimum
+ * size in bytes of the host bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
+ const unsigned char *net_cat,
+ u32 net_cat_len,
+ unsigned char *host_cat,
+ u32 host_cat_len)
+{
+ u32 host_spot;
+ u32 host_spot_max = 0;
+ int net_spot = -1;
+ u32 net_clen_bits = net_cat_len * 8;
+ u32 host_clen_bits = host_cat_len * 8;
+ u32 net_cat_size = doi_def->map.std->cat.cipso_size;
+ u32 *net_cat_array = doi_def->map.std->cat.cipso;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ if (net_cat_len > host_cat_len)
+ return -EINVAL;
+ memcpy(host_cat, net_cat, net_cat_len);
+ return net_cat_len;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ net_spot = cipso_v4_bitmap_walk(net_cat,
+ net_clen_bits,
+ net_spot + 1,
+ 1);
+ if (net_spot < 0)
+ break;
+ if (net_spot >= net_cat_size ||
+ net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
+ return -EPERM;
+
+ host_spot = net_cat_array[net_spot];
+ if (host_spot >= host_clen_bits)
+ return -ENOSPC;
+ cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+
+ if (host_spot > host_spot_max)
+ host_spot_max = host_spot;
+ }
+
+ if (net_spot == -2)
+ return -EFAULT;
+
+ if (++host_spot_max % 8)
+ return host_spot_max / 8 + 1;
+ return host_spot_max / 8;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Protocol Handling Functions
+ */
+
+#define CIPSO_V4_HDR_LEN 6
+
+/**
+ * cipso_v4_gentag_hdr - Generate a CIPSO option header
+ * @doi_def: the DOI definition
+ * @len: the total tag length in bytes
+ * @buf: the CIPSO option buffer
+ *
+ * Description:
+ * Write a CIPSO header into the beginning of @buffer. Return zero on success,
+ * negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+ u32 len,
+ unsigned char *buf)
+{
+ if (CIPSO_V4_HDR_LEN + len > 40)
+ return -ENOSPC;
+
+ buf[0] = IPOPT_CIPSO;
+ buf[1] = CIPSO_V4_HDR_LEN + len;
+ *(u32 *)&buf[2] = htonl(doi_def->doi);
+
+ return 0;
+}
+
+#define CIPSO_V4_TAG1_CAT_LEN 30
+
+/**
+ * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The
+ * actual buffer length may be larger than the indicated size due to
+ * translation between host and network category bitmaps. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char **buffer,
+ u32 *buffer_len)
+{
+ int ret_val = -EPERM;
+ unsigned char *buf = NULL;
+ u32 buf_len;
+ u32 level;
+
+ if (secattr->mls_cat) {
+ buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
+ GFP_ATOMIC);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
+ secattr->mls_cat,
+ secattr->mls_cat_len,
+ &buf[CIPSO_V4_HDR_LEN + 4],
+ CIPSO_V4_TAG1_CAT_LEN);
+ if (ret_val < 0)
+ goto gentag_failure;
+
+ /* This will send packets using the "optimized" format when
+ * possibile as specified in section 3.4.2.6 of the
+ * CIPSO draft. */
+ if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
+ ret_val = 10;
+
+ buf_len = 4 + ret_val;
+ } else {
+ buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
+ if (buf == NULL)
+ return -ENOMEM;
+ buf_len = 4;
+ }
+
+ ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+ if (ret_val != 0)
+ goto gentag_failure;
+
+ ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+ if (ret_val != 0)
+ goto gentag_failure;
+
+ buf[CIPSO_V4_HDR_LEN] = 0x01;
+ buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
+ buf[CIPSO_V4_HDR_LEN + 3] = level;
+
+ *buffer = buf;
+ *buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+
+ return 0;
+
+gentag_failure:
+ kfree(buf);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr. Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+ const unsigned char *tag,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u8 tag_len = tag[1];
+ u32 level;
+
+ ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+ if (ret_val != 0)
+ return ret_val;
+ secattr->mls_lvl = level;
+ secattr->mls_lvl_vld = 1;
+
+ if (tag_len > 4) {
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ secattr->mls_cat_len = tag_len - 4;
+ break;
+ case CIPSO_V4_MAP_STD:
+ secattr->mls_cat_len =
+ doi_def->map.std->cat.local_size;
+ break;
+ }
+ secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+ if (secattr->mls_cat == NULL)
+ return -ENOMEM;
+
+ ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+ &tag[4],
+ tag_len - 4,
+ secattr->mls_cat,
+ secattr->mls_cat_len);
+ if (ret_val < 0) {
+ kfree(secattr->mls_cat);
+ return ret_val;
+ }
+ secattr->mls_cat_len = ret_val;
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_validate - Validate a CIPSO option
+ * @option: the start of the option, on error it is set to point to the error
+ *
+ * Description:
+ * This routine is called to validate a CIPSO option, it checks all of the
+ * fields to ensure that they are at least valid, see the draft snippet below
+ * for details. If the option is valid then a zero value is returned and
+ * the value of @option is unchanged. If the option is invalid then a
+ * non-zero value is returned and @option is adjusted to point to the
+ * offending portion of the option. From the IETF draft ...
+ *
+ * "If any field within the CIPSO options, such as the DOI identifier, is not
+ * recognized the IP datagram is discarded and an ICMP 'parameter problem'
+ * (type 12) is generated and returned. The ICMP code field is set to 'bad
+ * parameter' (code 0) and the pointer is set to the start of the CIPSO field
+ * that is unrecognized."
+ *
+ */
+int cipso_v4_validate(unsigned char **option)
+{
+ unsigned char *opt = *option;
+ unsigned char *tag;
+ unsigned char opt_iter;
+ unsigned char err_offset = 0;
+ u8 opt_len;
+ u8 tag_len;
+ struct cipso_v4_doi *doi_def = NULL;
+ u32 tag_iter;
+
+ /* caller already checks for length values that are too large */
+ opt_len = opt[1];
+ if (opt_len < 8) {
+ err_offset = 1;
+ goto validate_return;
+ }
+
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+ if (doi_def == NULL) {
+ err_offset = 2;
+ goto validate_return_locked;
+ }
+
+ opt_iter = 6;
+ tag = opt + opt_iter;
+ while (opt_iter < opt_len) {
+ for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
+ if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
+ ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
+ err_offset = opt_iter;
+ goto validate_return_locked;
+ }
+
+ tag_len = tag[1];
+ if (tag_len > (opt_len - opt_iter)) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ switch (tag[0]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ if (tag_len < 4) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ /* We are already going to do all the verification
+ * necessary at the socket layer so from our point of
+ * view it is safe to turn these checks off (and less
+ * work), however, the CIPSO draft says we should do
+ * all the CIPSO validations here but it doesn't
+ * really specify _exactly_ what we need to validate
+ * ... so, just make it a sysctl tunable. */
+ if (cipso_v4_rbm_strictvalid) {
+ if (cipso_v4_map_lvl_valid(doi_def,
+ tag[3]) < 0) {
+ err_offset = opt_iter + 3;
+ goto validate_return_locked;
+ }
+ if (tag_len > 4 &&
+ cipso_v4_map_cat_rbm_valid(doi_def,
+ &tag[4],
+ tag_len - 4) < 0) {
+ err_offset = opt_iter + 4;
+ goto validate_return_locked;
+ }
+ }
+ break;
+ default:
+ err_offset = opt_iter;
+ goto validate_return_locked;
+ }
+
+ tag += tag_len;
+ opt_iter += tag_len;
+ }
+
+validate_return_locked:
+ rcu_read_unlock();
+validate_return:
+ *option = opt + err_offset;
+ return err_offset;
+}
+
+/**
+ * cipso_v4_error - Send the correct reponse for a bad packet
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: CIPSO gateway flag
+ *
+ * Description:
+ * Based on the error code given in @error, send an ICMP error message back to
+ * the originating host. From the IETF draft ...
+ *
+ * "If the contents of the CIPSO [option] are valid but the security label is
+ * outside of the configured host or port label range, the datagram is
+ * discarded and an ICMP 'destination unreachable' (type 3) is generated and
+ * returned. The code field of the ICMP is set to 'communication with
+ * destination network administratively prohibited' (code 9) or to
+ * 'communication with destination host administratively prohibited'
+ * (code 10). The value of the code is dependent on whether the originator
+ * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The
+ * recipient of the ICMP message MUST be able to handle either value. The
+ * same procedure is performed if a CIPSO [option] can not be added to an
+ * IP packet because it is too large to fit in the IP options area."
+ *
+ * "If the error is triggered by receipt of an ICMP message, the message is
+ * discarded and no response is permitted (consistent with general ICMP
+ * processing rules)."
+ *
+ */
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
+{
+ if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+ return;
+
+ if (gateway)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+ else
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+}
+
+/**
+ * cipso_v4_socket_setattr - Add a CIPSO option to a socket
+ * @sock: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function. This function requires
+ * exclusive access to @sock->sk, which means it either needs to be in the
+ * process of being created or locked via lock_sock(sock->sk). Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_setattr(const struct socket *sock,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ u32 iter;
+ unsigned char *buf = NULL;
+ u32 buf_len = 0;
+ u32 opt_len;
+ struct ip_options *opt = NULL;
+ struct sock *sk;
+ struct inet_sock *sk_inet;
+ struct inet_connection_sock *sk_conn;
+
+ /* In the case of sock_create_lite(), the sock->sk field is not
+ * defined yet but it is not a problem as the only users of these
+ * "lite" PF_INET sockets are functions which do an accept() call
+ * afterwards so we will label the socket as part of the accept(). */
+ sk = sock->sk;
+ if (sk == NULL)
+ return 0;
+
+ /* XXX - This code assumes only one tag per CIPSO option which isn't
+ * really a good assumption to make but since we only support the MAC
+ * tags right now it is a safe assumption. */
+ iter = 0;
+ do {
+ switch (doi_def->tags[iter]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_gentag_rbm(doi_def,
+ secattr,
+ &buf,
+ &buf_len);
+ break;
+ default:
+ ret_val = -EPERM;
+ goto socket_setattr_failure;
+ }
+
+ iter++;
+ } while (ret_val != 0 &&
+ iter < CIPSO_V4_TAG_MAXCNT &&
+ doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
+ if (ret_val != 0)
+ goto socket_setattr_failure;
+
+ /* We can't use ip_options_get() directly because it makes a call to
+ * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
+ * we can't block here. */
+ opt_len = (buf_len + 3) & ~3;
+ opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
+ if (opt == NULL) {
+ ret_val = -ENOMEM;
+ goto socket_setattr_failure;
+ }
+ memcpy(opt->__data, buf, buf_len);
+ opt->optlen = opt_len;
+ opt->is_data = 1;
+ kfree(buf);
+ buf = NULL;
+ ret_val = ip_options_compile(opt, NULL);
+ if (ret_val != 0)
+ goto socket_setattr_failure;
+
+ sk_inet = inet_sk(sk);
+ if (sk_inet->is_icsk) {
+ sk_conn = inet_csk(sk);
+ if (sk_inet->opt)
+ sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
+ sk_conn->icsk_ext_hdr_len += opt->optlen;
+ sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+ }
+ opt = xchg(&sk_inet->opt, opt);
+ kfree(opt);
+
+ return 0;
+
+socket_setattr_failure:
+ kfree(buf);
+ kfree(opt);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_sock_getattr - Get the security attributes from a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sk to see if there is a CIPSO option attached to the sock and if
+ * there is return the CIPSO security attributes in @secattr. This function
+ * requires that @sk be locked, or privately held, but it does not do any
+ * locking itself. Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOMSG;
+ struct inet_sock *sk_inet;
+ unsigned char *cipso_ptr;
+ u32 doi;
+ struct cipso_v4_doi *doi_def;
+
+ sk_inet = inet_sk(sk);
+ if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
+ return -ENOMSG;
+ cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
+ sizeof(struct iphdr);
+ ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
+ if (ret_val == 0)
+ return ret_val;
+
+ doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL) {
+ rcu_read_unlock();
+ return -ENOMSG;
+ }
+ switch (cipso_ptr[6]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_parsetag_rbm(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ lock_sock(sock->sk);
+ ret_val = cipso_v4_sock_getattr(sock->sk, secattr);
+ release_sock(sock->sk);
+
+ return ret_val;
+}
+
+/**
+ * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse the given packet's CIPSO option and return the security attributes.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOMSG;
+ unsigned char *cipso_ptr;
+ u32 doi;
+ struct cipso_v4_doi *doi_def;
+
+ if (!CIPSO_V4_OPTEXIST(skb))
+ return -ENOMSG;
+ cipso_ptr = CIPSO_V4_OPTPTR(skb);
+ if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
+ return 0;
+
+ doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL)
+ goto skbuff_getattr_return;
+ switch (cipso_ptr[6]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_parsetag_rbm(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
+ }
+
+skbuff_getattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * cipso_v4_init - Initialize the CIPSO module
+ *
+ * Description:
+ * Initialize the CIPSO module and prepare it for use. Returns zero on success
+ * and negative values on failure.
+ *
+ */
+static int __init cipso_v4_init(void)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_cache_init();
+ if (ret_val != 0)
+ panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
+ ret_val);
+
+ return 0;
+}
+
+subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index c1b42b5257f8..7b068a891953 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -11,7 +11,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/ip.h>
@@ -26,7 +25,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
struct rtable *rt;
- u32 saddr;
+ __be32 saddr;
int oif;
int err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 54419b27686f..7602c79a389b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -27,7 +27,6 @@
* if no match found.
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -44,6 +43,7 @@
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -63,6 +63,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
+#include <net/netlink.h>
struct ipv4_devconf ipv4_devconf = {
.accept_redirects = 1,
@@ -79,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
.accept_source_route = 1,
};
-static void rtmsg_ifa(int event, struct in_ifaddr *);
+static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+ [IFA_LOCAL] = { .type = NLA_U32 },
+ [IFA_ADDRESS] = { .type = NLA_U32 },
+ [IFA_BROADCAST] = { .type = NLA_U32 },
+ [IFA_ANYCAST] = { .type = NLA_U32 },
+ [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+};
+
+static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -94,10 +103,9 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
static struct in_ifaddr *inet_alloc_ifa(void)
{
- struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+ struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
if (ifa) {
- memset(ifa, 0, sizeof(*ifa));
INIT_RCU_HEAD(&ifa->rcu_head);
}
@@ -141,10 +149,9 @@ struct in_device *inetdev_init(struct net_device *dev)
ASSERT_RTNL();
- in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+ in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
if (!in_dev)
goto out;
- memset(in_dev, 0, sizeof(*in_dev));
INIT_RCU_HEAD(&in_dev->rcu_head);
memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
@@ -217,7 +224,7 @@ static void inetdev_destroy(struct in_device *in_dev)
call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
}
-int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
+int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
rcu_read_lock();
for_primary_ifa(in_dev) {
@@ -232,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
return 0;
}
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy)
+static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy, struct nlmsghdr *nlh, u32 pid)
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -266,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
if (!do_promote) {
*ifap1 = ifa->ifa_next;
- rtmsg_ifa(RTM_DELADDR, ifa);
+ rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_DOWN, ifa);
inet_free_ifa(ifa);
@@ -291,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
is valid, it will try to restore deleted routes... Grr.
So that, this order is correct.
*/
- rtmsg_ifa(RTM_DELADDR, ifa1);
+ rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
@@ -303,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
- rtmsg_ifa(RTM_NEWADDR, promote);
+ rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -322,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
}
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy)
+{
+ __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
+}
+
+static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
+ u32 pid)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -367,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
listeners of netlink will know about new ifaddr */
- rtmsg_ifa(RTM_NEWADDR, ifa);
+ rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
return 0;
}
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+ return __inet_insert_ifa(ifa, NULL, 0);
+}
+
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -410,8 +429,8 @@ struct in_device *inetdev_by_index(int ifindex)
/* Called only from RTNL semaphored context. No locks. */
-struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
- u32 mask)
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+ __be32 mask)
{
ASSERT_RTNL();
@@ -424,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
struct in_device *in_dev;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
struct in_ifaddr *ifa, **ifap;
+ int err = -EINVAL;
ASSERT_RTNL();
- if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
- goto out;
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ ifm = nlmsg_data(nlh);
+ in_dev = inetdev_by_index(ifm->ifa_index);
+ if (in_dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+ }
+
__in_dev_put(in_dev);
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if ((rta[IFA_LOCAL - 1] &&
- memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
- &ifa->ifa_local, 4)) ||
- (rta[IFA_LABEL - 1] &&
- rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
- (rta[IFA_ADDRESS - 1] &&
- (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
- !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
- ifa))))
+ if (tb[IFA_LOCAL] &&
+ ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
+ continue;
+
+ if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+ continue;
+
+ if (tb[IFA_ADDRESS] &&
+ (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+ !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
continue;
- inet_del_ifa(in_dev, ifap, 1);
+
+ __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
return 0;
}
-out:
- return -EADDRNOTAVAIL;
+
+ err = -EADDRNOTAVAIL;
+errout:
+ return err;
}
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in_ifaddr *ifa;
+ struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
- struct in_ifaddr *ifa;
- int rc = -EINVAL;
+ int err = -EINVAL;
- ASSERT_RTNL();
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ if (err < 0)
+ goto errout;
- if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
- goto out;
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+ goto errout;
- rc = -ENODEV;
- if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
- goto out;
+ dev = __dev_get_by_index(ifm->ifa_index);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+ }
- rc = -ENOBUFS;
- if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev == NULL) {
in_dev = inetdev_init(dev);
- if (!in_dev)
- goto out;
+ if (in_dev == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
}
- if ((ifa = inet_alloc_ifa()) == NULL)
- goto out;
+ ifa = inet_alloc_ifa();
+ if (ifa == NULL) {
+ /*
+ * A potential indev allocation can be left alive, it stays
+ * assigned to its device and is destroy with it.
+ */
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ in_dev_hold(in_dev);
+
+ if (tb[IFA_ADDRESS] == NULL)
+ tb[IFA_ADDRESS] = tb[IFA_LOCAL];
- if (!rta[IFA_ADDRESS - 1])
- rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
- memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
- memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
- if (rta[IFA_BROADCAST - 1])
- memcpy(&ifa->ifa_broadcast,
- RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
- if (rta[IFA_ANYCAST - 1])
- memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
ifa->ifa_flags = ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
- if (rta[IFA_LABEL - 1])
- rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
+ ifa->ifa_dev = in_dev;
+
+ ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
+ ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
+
+ if (tb[IFA_BROADCAST])
+ ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
+
+ if (tb[IFA_ANYCAST])
+ ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
+
+ if (tb[IFA_LABEL])
+ nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- rc = inet_insert_ifa(ifa);
-out:
- return rc;
+ return ifa;
+
+errout:
+ return ERR_PTR(err);
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct in_ifaddr *ifa;
+
+ ASSERT_RTNL();
+
+ ifa = rtm_to_ifaddr(nlh);
+ if (IS_ERR(ifa))
+ return PTR_ERR(ifa);
+
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
}
/*
@@ -739,7 +805,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
break;
ret = 0;
if (ifa->ifa_mask != sin->sin_addr.s_addr) {
- u32 old_mask = ifa->ifa_mask;
+ __be32 old_mask = ifa->ifa_mask;
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_mask = sin->sin_addr.s_addr;
ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -810,9 +876,9 @@ out:
return done;
}
-u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
+__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
- u32 addr = 0;
+ __be32 addr = 0;
struct in_device *in_dev;
rcu_read_lock();
@@ -861,11 +927,11 @@ out:
return addr;
}
-static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
- u32 local, int scope)
+static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
+ __be32 local, int scope)
{
int same = 0;
- u32 addr = 0;
+ __be32 addr = 0;
for_ifa(in_dev) {
if (!addr &&
@@ -905,9 +971,9 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
* - local: address, 0=autoselect the local address
* - scope: maximum allowed scope value for the local address
*/
-u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope)
+__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
{
- u32 addr = 0;
+ __be32 addr = 0;
struct in_device *in_dev;
if (dev) {
@@ -1059,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+
if (ifa->ifa_address)
- RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+ NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
+
if (ifa->ifa_local)
- RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+ NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
+
if (ifa->ifa_broadcast)
- RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+ NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+
if (ifa->ifa_anycast)
- RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+ NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
+
if (ifa->ifa_label[0])
- RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1130,19 +1201,27 @@ done:
return skb->len;
}
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+ u32 pid)
{
- int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
- struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+ struct sk_buff *skb;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
- if (!skb)
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
- else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+ err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
- } else {
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
+ goto errout;
}
+
+ err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
}
static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1154,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
.dumpit = inet_dump_fib, },
#ifdef CONFIG_IP_MULTIPLE_TABLES
- [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, },
- [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, },
- [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, },
+ [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
#endif
};
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9bbdd4494551..13b29360d102 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,4 +1,4 @@
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -17,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
int err;
struct iphdr *top_iph;
struct ip_esp_hdr *esph;
- struct crypto_tfm *tfm;
+ struct crypto_blkcipher *tfm;
+ struct blkcipher_desc desc;
struct esp_data *esp;
struct sk_buff *trailer;
int blksize;
@@ -37,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esp = x->data;
alen = esp->auth.icv_trunc_len;
tfm = esp->conf.tfm;
- blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+ desc.tfm = tfm;
+ desc.flags = 0;
+ blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
clen = ALIGN(clen + 2, blksize);
if (esp->conf.padlen)
clen = ALIGN(clen, esp->conf.padlen);
@@ -92,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esph->seq_no = htonl(++x->replay.oseq);
xfrm_aevent_doreplay(x);
- if (esp->conf.ivlen)
- crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ if (esp->conf.ivlen) {
+ if (unlikely(!esp->conf.ivinitted)) {
+ get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 1;
+ }
+ crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+ }
do {
struct scatterlist *sg = &esp->sgbuf[0];
@@ -104,26 +112,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
goto error;
}
skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
- crypto_cipher_encrypt(tfm, sg, sg, clen);
+ err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
} while (0);
+ if (unlikely(err))
+ goto error;
+
if (esp->conf.ivlen) {
- memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
- crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+ crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
}
if (esp->auth.icv_full_len) {
- esp->auth.icv(esp, skb, (u8*)esph-skb->data,
- sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
- pskb_put(skb, trailer, alen);
+ err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+ sizeof(*esph) + esp->conf.ivlen + clen);
+ memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
}
ip_send_check(top_iph);
- err = 0;
-
error:
return err;
}
@@ -138,8 +147,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph;
struct ip_esp_hdr *esph;
struct esp_data *esp = x->data;
+ struct crypto_blkcipher *tfm = esp->conf.tfm;
+ struct blkcipher_desc desc = { .tfm = tfm };
struct sk_buff *trailer;
- int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
int nfrags;
@@ -147,6 +158,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
u8 nexthdr[2];
struct scatterlist *sg;
int padlen;
+ int err;
if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
goto out;
@@ -156,15 +168,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
/* If integrity check is required, do this. */
if (esp->auth.icv_full_len) {
- u8 sum[esp->auth.icv_full_len];
- u8 sum1[alen];
-
- esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+ u8 sum[alen];
+
+ err = esp_mac_digest(esp, skb, 0, skb->len - alen);
+ if (err)
+ goto out;
- if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+ if (skb_copy_bits(skb, skb->len - alen, sum, alen))
BUG();
- if (unlikely(memcmp(sum, sum1, alen))) {
+ if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
x->stats.integrity_failed++;
goto out;
}
@@ -179,7 +192,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
/* Get ivec. This can be wrong, check against another impls. */
if (esp->conf.ivlen)
- crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+ crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
sg = &esp->sgbuf[0];
@@ -189,9 +202,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
}
skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
- crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+ err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
+ if (unlikely(err))
+ return err;
if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
BUG();
@@ -238,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
* as per draft-ietf-ipsec-udp-encaps-06,
* section 3.1.2
*/
- if (!x->props.mode)
+ if (x->props.mode == XFRM_MODE_TRANSPORT)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -255,9 +270,9 @@ out:
static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
@@ -294,11 +309,11 @@ static void esp_destroy(struct xfrm_state *x)
if (!esp)
return;
- crypto_free_tfm(esp->conf.tfm);
+ crypto_free_blkcipher(esp->conf.tfm);
esp->conf.tfm = NULL;
kfree(esp->conf.ivec);
esp->conf.ivec = NULL;
- crypto_free_tfm(esp->auth.tfm);
+ crypto_free_hash(esp->auth.tfm);
esp->auth.tfm = NULL;
kfree(esp->auth.work_icv);
esp->auth.work_icv = NULL;
@@ -308,6 +323,7 @@ static void esp_destroy(struct xfrm_state *x)
static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
+ struct crypto_blkcipher *tfm;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -317,30 +333,33 @@ static int esp_init_state(struct xfrm_state *x)
if (x->ealg == NULL)
goto error;
- esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+ esp = kzalloc(sizeof(*esp), GFP_KERNEL);
if (esp == NULL)
return -ENOMEM;
- memset(esp, 0, sizeof(*esp));
-
if (x->aalg) {
struct xfrm_algo_desc *aalg_desc;
+ struct crypto_hash *hash;
esp->auth.key = x->aalg->alg_key;
esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
- esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
- if (esp->auth.tfm == NULL)
+ hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hash))
+ goto error;
+
+ esp->auth.tfm = hash;
+ if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
goto error;
- esp->auth.icv = esp_hmac_digest;
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+ crypto_hash_digestsize(hash)) {
NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
x->aalg->alg_name,
- crypto_tfm_alg_digestsize(esp->auth.tfm),
+ crypto_hash_digestsize(hash),
aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
@@ -354,24 +373,22 @@ static int esp_init_state(struct xfrm_state *x)
}
esp->conf.key = x->ealg->alg_key;
esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
- if (x->props.ealgo == SADB_EALG_NULL)
- esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
- else
- esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
- if (esp->conf.tfm == NULL)
+ tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
goto error;
- esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+ esp->conf.tfm = tfm;
+ esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
esp->conf.padlen = 0;
if (esp->conf.ivlen) {
esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
if (unlikely(esp->conf.ivec == NULL))
goto error;
- get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 0;
}
- if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+ if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
goto error;
x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 31387abf53a2..9c399a70dd5d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -15,7 +15,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -33,10 +32,12 @@
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
+#include <linux/list.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -51,48 +52,67 @@
#ifndef CONFIG_IP_MULTIPLE_TABLES
-#define RT_TABLE_MIN RT_TABLE_MAIN
-
struct fib_table *ip_fib_local_table;
struct fib_table *ip_fib_main_table;
-#else
+#define FIB_TABLE_HASHSZ 1
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-#define RT_TABLE_MIN 1
+#else
-struct fib_table *fib_tables[RT_TABLE_MAX+1];
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-struct fib_table *__fib_new_table(int id)
+struct fib_table *fib_new_table(u32 id)
{
struct fib_table *tb;
+ unsigned int h;
+ if (id == 0)
+ id = RT_TABLE_MAIN;
+ tb = fib_get_table(id);
+ if (tb)
+ return tb;
tb = fib_hash_init(id);
if (!tb)
return NULL;
- fib_tables[id] = tb;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
return tb;
}
+struct fib_table *fib_get_table(u32 id)
+{
+ struct fib_table *tb;
+ struct hlist_node *node;
+ unsigned int h;
+ if (id == 0)
+ id = RT_TABLE_MAIN;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+ if (tb->tb_id == id) {
+ rcu_read_unlock();
+ return tb;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
-
static void fib_flush(void)
{
int flushed = 0;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_table *tb;
- int id;
+ struct hlist_node *node;
+ unsigned int h;
- for (id = RT_TABLE_MAX; id>0; id--) {
- if ((tb = fib_get_table(id))==NULL)
- continue;
- flushed += tb->tb_flush(tb);
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+ flushed += tb->tb_flush(tb);
}
-#else /* CONFIG_IP_MULTIPLE_TABLES */
- flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
- flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
-#endif /* CONFIG_IP_MULTIPLE_TABLES */
if (flushed)
rt_cache_flush(-1);
@@ -102,7 +122,7 @@ static void fib_flush(void)
* Find the first device with a given source address.
*/
-struct net_device * ip_dev_find(u32 addr)
+struct net_device * ip_dev_find(__be32 addr)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
struct fib_result res;
@@ -126,7 +146,7 @@ out:
return dev;
}
-unsigned inet_addr_type(u32 addr)
+unsigned inet_addr_type(__be32 addr)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
struct fib_result res;
@@ -160,8 +180,8 @@ unsigned inet_addr_type(u32 addr)
- check, that packet arrived from expected physical interface.
*/
-int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
- struct net_device *dev, u32 *spec_dst, u32 *itag)
+int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
+ struct net_device *dev, __be32 *spec_dst, u32 *itag)
{
struct in_device *in_dev;
struct flowi fl = { .nl_u = { .ip4_u =
@@ -233,42 +253,190 @@ e_inval:
#ifndef CONFIG_IP_NOSIOCRT
+static inline __be32 sk_extract_addr(struct sockaddr *addr)
+{
+ return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+}
+
+static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
+{
+ struct nlattr *nla;
+
+ nla = (struct nlattr *) ((char *) mx + len);
+ nla->nla_type = type;
+ nla->nla_len = nla_attr_size(4);
+ *(u32 *) nla_data(nla) = value;
+
+ return len + nla_total_size(4);
+}
+
+static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+ struct fib_config *cfg)
+{
+ __be32 addr;
+ int plen;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ if (rt->rt_dst.sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ /*
+ * Check mask for validity:
+ * a) it must be contiguous.
+ * b) destination must have all host bits clear.
+ * c) if application forgot to set correct family (AF_INET),
+ * reject request unless it is absolutely clear i.e.
+ * both family and mask are zero.
+ */
+ plen = 32;
+ addr = sk_extract_addr(&rt->rt_dst);
+ if (!(rt->rt_flags & RTF_HOST)) {
+ __be32 mask = sk_extract_addr(&rt->rt_genmask);
+
+ if (rt->rt_genmask.sa_family != AF_INET) {
+ if (mask || rt->rt_genmask.sa_family)
+ return -EAFNOSUPPORT;
+ }
+
+ if (bad_mask(mask, addr))
+ return -EINVAL;
+
+ plen = inet_mask_len(mask);
+ }
+
+ cfg->fc_dst_len = plen;
+ cfg->fc_dst = addr;
+
+ if (cmd != SIOCDELRT) {
+ cfg->fc_nlflags = NLM_F_CREATE;
+ cfg->fc_protocol = RTPROT_BOOT;
+ }
+
+ if (rt->rt_metric)
+ cfg->fc_priority = rt->rt_metric - 1;
+
+ if (rt->rt_flags & RTF_REJECT) {
+ cfg->fc_scope = RT_SCOPE_HOST;
+ cfg->fc_type = RTN_UNREACHABLE;
+ return 0;
+ }
+
+ cfg->fc_scope = RT_SCOPE_NOWHERE;
+ cfg->fc_type = RTN_UNICAST;
+
+ if (rt->rt_dev) {
+ char *colon;
+ struct net_device *dev;
+ char devname[IFNAMSIZ];
+
+ if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
+ return -EFAULT;
+
+ devname[IFNAMSIZ-1] = 0;
+ colon = strchr(devname, ':');
+ if (colon)
+ *colon = 0;
+ dev = __dev_get_by_name(devname);
+ if (!dev)
+ return -ENODEV;
+ cfg->fc_oif = dev->ifindex;
+ if (colon) {
+ struct in_ifaddr *ifa;
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev)
+ return -ENODEV;
+ *colon = ':';
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+ if (strcmp(ifa->ifa_label, devname) == 0)
+ break;
+ if (ifa == NULL)
+ return -ENODEV;
+ cfg->fc_prefsrc = ifa->ifa_local;
+ }
+ }
+
+ addr = sk_extract_addr(&rt->rt_gateway);
+ if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ cfg->fc_gw = addr;
+ if (rt->rt_flags & RTF_GATEWAY &&
+ inet_addr_type(addr) == RTN_UNICAST)
+ cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ }
+
+ if (cmd == SIOCDELRT)
+ return 0;
+
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+ return -EINVAL;
+
+ if (cfg->fc_scope == RT_SCOPE_NOWHERE)
+ cfg->fc_scope = RT_SCOPE_LINK;
+
+ if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
+ struct nlattr *mx;
+ int len = 0;
+
+ mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ if (mx == NULL)
+ return -ENOMEM;
+
+ if (rt->rt_flags & RTF_MTU)
+ len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
+
+ if (rt->rt_flags & RTF_WINDOW)
+ len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
+
+ if (rt->rt_flags & RTF_IRTT)
+ len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
+
+ cfg->fc_mx = mx;
+ cfg->fc_mx_len = len;
+ }
+
+ return 0;
+}
+
/*
* Handle IP routing ioctl calls. These are used to manipulate the routing tables
*/
int ip_rt_ioctl(unsigned int cmd, void __user *arg)
{
+ struct fib_config cfg;
+ struct rtentry rt;
int err;
- struct kern_rta rta;
- struct rtentry r;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
switch (cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+
+ if (copy_from_user(&rt, arg, sizeof(rt)))
return -EFAULT;
+
rtnl_lock();
- err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+ err = rtentry_to_fib_config(cmd, &rt, &cfg);
if (err == 0) {
+ struct fib_table *tb;
+
if (cmd == SIOCDELRT) {
- struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
- err = -ESRCH;
+ tb = fib_get_table(cfg.fc_table);
if (tb)
- err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ err = tb->tb_delete(tb, &cfg);
+ else
+ err = -ESRCH;
} else {
- struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
- err = -ENOBUFS;
+ tb = fib_new_table(cfg.fc_table);
if (tb)
- err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ err = tb->tb_insert(tb, &cfg);
+ else
+ err = -ENOBUFS;
}
- kfree(rta.rta_mx);
+
+ /* allocated by rtentry_to_fib_config() */
+ kfree(cfg.fc_mx);
}
rtnl_unlock();
return err;
@@ -285,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
#endif
-static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+ [RTA_DST] = { .type = NLA_U32 },
+ [RTA_SRC] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_OIF] = { .type = NLA_U32 },
+ [RTA_GATEWAY] = { .type = NLA_U32 },
+ [RTA_PRIORITY] = { .type = NLA_U32 },
+ [RTA_PREFSRC] = { .type = NLA_U32 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+ [RTA_PROTOINFO] = { .type = NLA_U32 },
+ [RTA_FLOW] = { .type = NLA_U32 },
+ [RTA_MP_ALGO] = { .type = NLA_U32 },
+};
+
+static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct fib_config *cfg)
{
- int i;
-
- for (i=1; i<=RTA_MAX; i++, rta++) {
- struct rtattr *attr = *rta;
- if (attr) {
- if (RTA_PAYLOAD(attr) < 4)
- return -EINVAL;
- if (i != RTA_MULTIPATH && i != RTA_METRICS)
- *rta = (struct rtattr*)RTA_DATA(attr);
+ struct nlattr *attr;
+ int err, remaining;
+ struct rtmsg *rtm;
+
+ err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ rtm = nlmsg_data(nlh);
+ cfg->fc_family = rtm->rtm_family;
+ cfg->fc_dst_len = rtm->rtm_dst_len;
+ cfg->fc_src_len = rtm->rtm_src_len;
+ cfg->fc_tos = rtm->rtm_tos;
+ cfg->fc_table = rtm->rtm_table;
+ cfg->fc_protocol = rtm->rtm_protocol;
+ cfg->fc_scope = rtm->rtm_scope;
+ cfg->fc_type = rtm->rtm_type;
+ cfg->fc_flags = rtm->rtm_flags;
+ cfg->fc_nlflags = nlh->nlmsg_flags;
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
+
+ nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
+ switch (attr->nla_type) {
+ case RTA_DST:
+ cfg->fc_dst = nla_get_be32(attr);
+ break;
+ case RTA_SRC:
+ cfg->fc_src = nla_get_be32(attr);
+ break;
+ case RTA_OIF:
+ cfg->fc_oif = nla_get_u32(attr);
+ break;
+ case RTA_GATEWAY:
+ cfg->fc_gw = nla_get_be32(attr);
+ break;
+ case RTA_PRIORITY:
+ cfg->fc_priority = nla_get_u32(attr);
+ break;
+ case RTA_PREFSRC:
+ cfg->fc_prefsrc = nla_get_be32(attr);
+ break;
+ case RTA_METRICS:
+ cfg->fc_mx = nla_data(attr);
+ cfg->fc_mx_len = nla_len(attr);
+ break;
+ case RTA_MULTIPATH:
+ cfg->fc_mp = nla_data(attr);
+ cfg->fc_mp_len = nla_len(attr);
+ break;
+ case RTA_FLOW:
+ cfg->fc_flow = nla_get_u32(attr);
+ break;
+ case RTA_MP_ALGO:
+ cfg->fc_mp_alg = nla_get_u32(attr);
+ break;
+ case RTA_TABLE:
+ cfg->fc_table = nla_get_u32(attr);
+ break;
}
}
+
return 0;
+errout:
+ return err;
}
int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct fib_table * tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
- if (inet_check_attr(r, rta))
- return -EINVAL;
+ err = rtm_to_fib_config(skb, nlh, &cfg);
+ if (err < 0)
+ goto errout;
- tb = fib_get_table(r->rtm_table);
- if (tb)
- return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
- return -ESRCH;
+ tb = fib_get_table(cfg.fc_table);
+ if (tb == NULL) {
+ err = -ESRCH;
+ goto errout;
+ }
+
+ err = tb->tb_delete(tb, &cfg);
+errout:
+ return err;
}
int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct fib_table * tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
- if (inet_check_attr(r, rta))
- return -EINVAL;
+ err = rtm_to_fib_config(skb, nlh, &cfg);
+ if (err < 0)
+ goto errout;
- tb = fib_new_table(r->rtm_table);
- if (tb)
- return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
- return -ENOBUFS;
+ tb = fib_new_table(cfg.fc_table);
+ if (tb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = tb->tb_insert(tb, &cfg);
+errout:
+ return err;
}
int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- int t;
- int s_t;
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
struct fib_table *tb;
+ struct hlist_node *node;
+ int dumped = 0;
- if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
- ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+ if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
return ip_rt_dump(skb, cb);
- s_t = cb->args[0];
- if (s_t == 0)
- s_t = cb->args[0] = RT_TABLE_MIN;
-
- for (t=s_t; t<=RT_TABLE_MAX; t++) {
- if (t < s_t) continue;
- if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
- if ((tb = fib_get_table(t))==NULL)
- continue;
- if (tb->tb_dump(tb, skb, cb) < 0)
- break;
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+ for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+ e = 0;
+ hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+ if (e < s_e)
+ goto next;
+ if (dumped)
+ memset(&cb->args[2], 0, sizeof(cb->args) -
+ 2 * sizeof(cb->args[0]));
+ if (tb->tb_dump(tb, skb, cb) < 0)
+ goto out;
+ dumped = 1;
+next:
+ e++;
+ }
}
-
- cb->args[0] = t;
+out:
+ cb->args[1] = e;
+ cb->args[0] = h;
return skb->len;
}
@@ -367,17 +627,18 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
- struct fib_table * tb;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
- struct kern_rta rta;
-
- memset(&req.rtm, 0, sizeof(req.rtm));
- memset(&rta, 0, sizeof(rta));
+ struct fib_table *tb;
+ struct fib_config cfg = {
+ .fc_protocol = RTPROT_KERNEL,
+ .fc_type = type,
+ .fc_dst = dst,
+ .fc_dst_len = dst_len,
+ .fc_prefsrc = ifa->ifa_local,
+ .fc_oif = ifa->ifa_dev->dev->ifindex,
+ .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+ };
if (type == RTN_UNICAST)
tb = fib_new_table(RT_TABLE_MAIN);
@@ -387,26 +648,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
if (tb == NULL)
return;
- req.nlh.nlmsg_len = sizeof(req);
- req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
- req.nlh.nlmsg_pid = 0;
- req.nlh.nlmsg_seq = 0;
+ cfg.fc_table = tb->tb_id;
- req.rtm.rtm_dst_len = dst_len;
- req.rtm.rtm_table = tb->tb_id;
- req.rtm.rtm_protocol = RTPROT_KERNEL;
- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
- req.rtm.rtm_type = type;
-
- rta.rta_dst = &dst;
- rta.rta_prefsrc = &ifa->ifa_local;
- rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+ if (type != RTN_LOCAL)
+ cfg.fc_scope = RT_SCOPE_LINK;
+ else
+ cfg.fc_scope = RT_SCOPE_HOST;
if (cmd == RTM_NEWROUTE)
- tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->tb_insert(tb, &cfg);
else
- tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->tb_delete(tb, &cfg);
}
void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -414,9 +666,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
struct in_device *in_dev = ifa->ifa_dev;
struct net_device *dev = in_dev->dev;
struct in_ifaddr *prim = ifa;
- u32 mask = ifa->ifa_mask;
- u32 addr = ifa->ifa_local;
- u32 prefix = ifa->ifa_address&mask;
+ __be32 mask = ifa->ifa_mask;
+ __be32 addr = ifa->ifa_local;
+ __be32 prefix = ifa->ifa_address&mask;
if (ifa->ifa_flags&IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, prefix, mask);
@@ -432,7 +684,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
@@ -454,8 +706,8 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
struct net_device *dev = in_dev->dev;
struct in_ifaddr *ifa1;
struct in_ifaddr *prim = ifa;
- u32 brd = ifa->ifa_address|~ifa->ifa_mask;
- u32 any = ifa->ifa_address&ifa->ifa_mask;
+ __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
+ __be32 any = ifa->ifa_address&ifa->ifa_mask;
#define LOCAL_OK 1
#define BRD_OK 2
#define BRD0_OK 4
@@ -653,11 +905,17 @@ static struct notifier_block fib_netdev_notifier = {
void __init ip_fib_init(void)
{
+ unsigned int i;
+
+ for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+ INIT_HLIST_HEAD(&fib_table_hash[i]);
#ifndef CONFIG_IP_MULTIPLE_TABLES
ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+ hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
#else
- fib_rules_init();
+ fib4_rules_init();
#endif
register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index e2890ec8159e..107bb6cbb0b3 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -15,7 +15,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
@@ -52,7 +51,7 @@ static kmem_cache_t *fn_alias_kmem __read_mostly;
struct fib_node {
struct hlist_node fn_hash;
struct list_head fn_alias;
- u32 fn_key;
+ __be32 fn_key;
};
struct fn_zone {
@@ -65,7 +64,7 @@ struct fn_zone {
#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
int fz_order; /* Zone order */
- u32 fz_mask;
+ __be32 fz_mask;
#define FZ_MASK(fz) ((fz)->fz_mask)
};
@@ -78,7 +77,7 @@ struct fn_hash {
struct fn_zone *fn_zone_list;
};
-static inline u32 fn_hash(u32 key, struct fn_zone *fz)
+static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
{
u32 h = ntohl(key)>>(32 - fz->fz_order);
h ^= (h>>20);
@@ -88,7 +87,7 @@ static inline u32 fn_hash(u32 key, struct fn_zone *fz)
return h;
}
-static inline u32 fz_key(u32 dst, struct fn_zone *fz)
+static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
{
return dst & FZ_MASK(fz);
}
@@ -205,11 +204,10 @@ static struct fn_zone *
fn_new_zone(struct fn_hash *table, int z)
{
int i;
- struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+ struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
if (!fz)
return NULL;
- memset(fz, 0, sizeof(struct fn_zone));
if (z) {
fz->fz_divisor = 16;
} else {
@@ -256,7 +254,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
struct hlist_head *head;
struct hlist_node *node;
struct fib_node *f;
- u32 k = fz_key(flp->fl4_dst, fz);
+ __be32 k = fz_key(flp->fl4_dst, fz);
head = &fz->fz_hash[fn_hash(k, fz)];
hlist_for_each_entry(f, node, head, fn_hash) {
@@ -367,7 +365,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
}
/* Return the node in FZ matching KEY. */
-static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
+static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
{
struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
struct hlist_node *node;
@@ -381,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
return NULL;
}
-static int
-fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
struct fib_node *new_f, *f;
struct fib_alias *fa, *new_fa;
struct fn_zone *fz;
struct fib_info *fi;
- int z = r->rtm_dst_len;
- int type = r->rtm_type;
- u8 tos = r->rtm_tos;
- u32 key;
+ u8 tos = cfg->fc_tos;
+ __be32 key;
int err;
- if (z > 32)
+ if (cfg->fc_dst_len > 32)
return -EINVAL;
- fz = table->fn_zones[z];
- if (!fz && !(fz = fn_new_zone(table, z)))
+
+ fz = table->fn_zones[cfg->fc_dst_len];
+ if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
return -ENOBUFS;
key = 0;
- if (rta->rta_dst) {
- u32 dst;
- memcpy(&dst, rta->rta_dst, 4);
- if (dst & ~FZ_MASK(fz))
+ if (cfg->fc_dst) {
+ if (cfg->fc_dst & ~FZ_MASK(fz))
return -EINVAL;
- key = fz_key(dst, fz);
+ key = fz_key(cfg->fc_dst, fz);
}
- if ((fi = fib_create_info(r, rta, n, &err)) == NULL)
- return err;
+ fi = fib_create_info(cfg);
+ if (IS_ERR(fi))
+ return PTR_ERR(fi);
if (fz->fz_nent > (fz->fz_divisor<<1) &&
fz->fz_divisor < FZ_MAX_DIVISOR &&
- (z==32 || (1<<z) > fz->fz_divisor))
+ (cfg->fc_dst_len == 32 ||
+ (1 << cfg->fc_dst_len) > fz->fz_divisor))
fn_rehash_zone(fz);
f = fib_find_node(fz, key);
@@ -442,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct fib_alias *fa_orig;
err = -EEXIST;
- if (n->nlmsg_flags & NLM_F_EXCL)
+ if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
- if (n->nlmsg_flags & NLM_F_REPLACE) {
+ if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
write_lock_bh(&fib_hash_lock);
fi_drop = fa->fa_info;
fa->fa_info = fi;
- fa->fa_type = type;
- fa->fa_scope = r->rtm_scope;
+ fa->fa_type = cfg->fc_type;
+ fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
fa->fa_state &= ~FA_S_ACCESSED;
fib_hash_genid++;
@@ -476,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
- if (fa->fa_type == type &&
- fa->fa_scope == r->rtm_scope &&
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi)
goto out;
}
- if (!(n->nlmsg_flags & NLM_F_APPEND))
+ if (!(cfg->fc_nlflags & NLM_F_APPEND))
fa = fa_orig;
}
err = -ENOENT;
- if (!(n->nlmsg_flags&NLM_F_CREATE))
+ if (!(cfg->fc_nlflags & NLM_F_CREATE))
goto out;
err = -ENOBUFS;
@@ -508,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
@@ -528,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
fz->fz_nent++;
rt_cache_flush(-1);
- rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+ rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
+ &cfg->fc_nlinfo);
return 0;
out_free_new_fa:
@@ -539,30 +535,25 @@ out:
}
-static int
-fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash*)tb->tb_data;
struct fib_node *f;
struct fib_alias *fa, *fa_to_delete;
- int z = r->rtm_dst_len;
struct fn_zone *fz;
- u32 key;
- u8 tos = r->rtm_tos;
+ __be32 key;
- if (z > 32)
+ if (cfg->fc_dst_len > 32)
return -EINVAL;
- if ((fz = table->fn_zones[z]) == NULL)
+
+ if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
return -ESRCH;
key = 0;
- if (rta->rta_dst) {
- u32 dst;
- memcpy(&dst, rta->rta_dst, 4);
- if (dst & ~FZ_MASK(fz))
+ if (cfg->fc_dst) {
+ if (cfg->fc_dst & ~FZ_MASK(fz))
return -EINVAL;
- key = fz_key(dst, fz);
+ key = fz_key(cfg->fc_dst, fz);
}
f = fib_find_node(fz, key);
@@ -570,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (!f)
fa = NULL;
else
- fa = fib_find_alias(&f->fn_alias, tos, 0);
+ fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
if (!fa)
return -ESRCH;
@@ -579,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
struct fib_info *fi = fa->fa_info;
- if (fa->fa_tos != tos)
+ if (fa->fa_tos != cfg->fc_tos)
break;
- if ((!r->rtm_type ||
- fa->fa_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- fib_nh_match(r, n, rta, fi) == 0) {
+ if ((!cfg->fc_type ||
+ fa->fa_type == cfg->fc_type) &&
+ (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+ fa->fa_scope == cfg->fc_scope) &&
+ (!cfg->fc_protocol ||
+ fi->fib_protocol == cfg->fc_protocol) &&
+ fib_nh_match(cfg, fi) == 0) {
fa_to_delete = fa;
break;
}
@@ -598,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
int kill_fn;
fa = fa_to_delete;
- rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+ rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
+ tb->tb_id, &cfg->fc_nlinfo);
kill_fn = 0;
write_lock_bh(&fib_hash_lock);
@@ -686,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
struct fib_node *f;
int i, s_i;
- s_i = cb->args[3];
+ s_i = cb->args[4];
i = 0;
hlist_for_each_entry(f, node, head, fn_hash) {
struct fib_alias *fa;
@@ -701,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
tb->tb_id,
fa->fa_type,
fa->fa_scope,
- &f->fn_key,
+ f->fn_key,
fz->fz_order,
fa->fa_tos,
fa->fa_info,
NLM_F_MULTI) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
next:
i++;
}
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -724,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
{
int h, s_h;
- s_h = cb->args[2];
+ s_h = cb->args[3];
for (h=0; h < fz->fz_divisor; h++) {
if (h < s_h) continue;
if (h > s_h)
- memset(&cb->args[3], 0,
- sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
if (fz->fz_hash == NULL ||
hlist_empty(&fz->fz_hash[h]))
continue;
if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
- cb->args[2] = h;
+ cb->args[3] = h;
return -1;
}
}
- cb->args[2] = h;
+ cb->args[3] = h;
return skb->len;
}
@@ -748,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
struct fn_zone *fz;
struct fn_hash *table = (struct fn_hash*)tb->tb_data;
- s_m = cb->args[1];
+ s_m = cb->args[2];
read_lock(&fib_hash_lock);
for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
if (m < s_m) continue;
if (m > s_m)
- memset(&cb->args[2], 0,
- sizeof(cb->args) - 2*sizeof(cb->args[0]));
+ memset(&cb->args[3], 0,
+ sizeof(cb->args) - 3*sizeof(cb->args[0]));
if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
- cb->args[1] = m;
+ cb->args[2] = m;
read_unlock(&fib_hash_lock);
return -1;
}
}
read_unlock(&fib_hash_lock);
- cb->args[1] = m;
+ cb->args[2] = m;
return skb->len;
}
#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
#else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
#endif
{
struct fib_table *tb;
@@ -974,7 +966,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v)
read_unlock(&fib_hash_lock);
}
-static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
+static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{
static const unsigned type2flags[RTN_MAX + 1] = {
[7] = RTF_REJECT, [8] = RTF_REJECT,
@@ -983,7 +975,7 @@ static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
if (fi && fi->fib_nh->nh_gw)
flags |= RTF_GATEWAY;
- if (mask == 0xFFFFFFFF)
+ if (mask == htonl(0xFFFFFFFF))
flags |= RTF_HOST;
flags |= RTF_UP;
return flags;
@@ -999,7 +991,7 @@ static int fib_seq_show(struct seq_file *seq, void *v)
{
struct fib_iter_state *iter;
char bf[128];
- u32 prefix, mask;
+ __be32 prefix, mask;
unsigned flags;
struct fib_node *f;
struct fib_alias *fa;
@@ -1047,7 +1039,7 @@ static int fib_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1058,7 +1050,6 @@ static int fib_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609ea0eb7..0e8b70bad4e1 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -20,22 +20,17 @@ struct fib_alias {
/* Exported by fib_semantics.c */
extern int fib_semantic_match(struct list_head *head,
const struct flowi *flp,
- struct fib_result *res, __u32 zone, __u32 mask,
+ struct fib_result *res, __be32 zone, __be32 mask,
int prefixlen);
extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(const struct rtmsg *r,
- struct kern_rta *rta,
- const struct nlmsghdr *,
- int *err);
-extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
- struct kern_rta *rta, struct fib_info *fi);
+extern struct fib_info *fib_create_info(struct fib_config *cfg);
+extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u8 tb_id, u8 type, u8 scope, void *dst,
+ u32 tb_id, u8 type, u8 scope, __be32 dst,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
-extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
- int z, int tb_id,
- struct nlmsghdr *n, struct netlink_skb_parms *req);
+extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
+ int dst_len, u32 tb_id, struct nl_info *info);
extern struct fib_alias *fib_find_alias(struct list_head *fah,
u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ec566f3e66c7..0852b9cd065a 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
*
* IPv4 Forwarding Information Base: policy rules.
*
- * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Thomas Graf <tgraf@suug.ch>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,465 +18,350 @@
* Marc Boucher : routing by fwmark
*/
-#include <linux/config.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/inetdevice.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
-
#include <net/ip.h>
-#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
-#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/fib_rules.h>
-#define FRprintk(a...)
+static struct fib_rules_ops fib4_rules_ops;
-struct fib_rule
+struct fib4_rule
{
- struct hlist_node hlist;
- atomic_t r_clntref;
- u32 r_preference;
- unsigned char r_table;
- unsigned char r_action;
- unsigned char r_dst_len;
- unsigned char r_src_len;
- u32 r_src;
- u32 r_srcmask;
- u32 r_dst;
- u32 r_dstmask;
- u32 r_srcmap;
- u8 r_flags;
- u8 r_tos;
+ struct fib_rule common;
+ u8 dst_len;
+ u8 src_len;
+ u8 tos;
+ __be32 src;
+ __be32 srcmask;
+ __be32 dst;
+ __be32 dstmask;
#ifdef CONFIG_IP_ROUTE_FWMARK
- u32 r_fwmark;
+ u32 fwmark;
+ u32 fwmask;
#endif
- int r_ifindex;
#ifdef CONFIG_NET_CLS_ROUTE
- __u32 r_tclassid;
+ u32 tclassid;
#endif
- char r_ifname[IFNAMSIZ];
- int r_dead;
- struct rcu_head rcu;
};
-static struct fib_rule default_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_preference = 0x7FFF,
- .r_table = RT_TABLE_DEFAULT,
- .r_action = RTN_UNICAST,
+static struct fib4_rule default_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7FFF,
+ .table = RT_TABLE_DEFAULT,
+ .action = FR_ACT_TO_TBL,
+ },
};
-static struct fib_rule main_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_preference = 0x7FFE,
- .r_table = RT_TABLE_MAIN,
- .r_action = RTN_UNICAST,
+static struct fib4_rule main_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7FFE,
+ .table = RT_TABLE_MAIN,
+ .action = FR_ACT_TO_TBL,
+ },
};
-static struct fib_rule local_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_table = RT_TABLE_LOCAL,
- .r_action = RTN_UNICAST,
+static struct fib4_rule local_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .table = RT_TABLE_LOCAL,
+ .action = FR_ACT_TO_TBL,
+ .flags = FIB_RULE_PERMANENT,
+ },
};
-static struct hlist_head fib_rules;
+static LIST_HEAD(fib4_rules);
-/* writer func called from netlink -- rtnl_sem hold*/
-
-static void rtmsg_rule(int, struct fib_rule *);
-
-int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct fib_rule *r;
- struct hlist_node *node;
- int err = -ESRCH;
-
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
- rtm->rtm_src_len == r->r_src_len &&
- rtm->rtm_dst_len == r->r_dst_len &&
- (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
- rtm->rtm_tos == r->r_tos &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
- (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
- (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
- (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
- (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
- (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
- err = -EPERM;
- if (r == &local_rule)
- break;
-
- hlist_del_rcu(&r->hlist);
- r->r_dead = 1;
- rtmsg_rule(RTM_DELRULE, r);
- fib_rule_put(r);
- err = 0;
- break;
- }
- }
- return err;
+ return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
}
+#endif
-/* Allocate new unique table id */
-
-static struct fib_table *fib_empty_table(void)
+int fib_lookup(struct flowi *flp, struct fib_result *res)
{
- int id;
+ struct fib_lookup_arg arg = {
+ .result = res,
+ };
+ int err;
- for (id = 1; id <= RT_TABLE_MAX; id++)
- if (fib_tables[id] == NULL)
- return __fib_new_table(id);
- return NULL;
-}
+ err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+ res->r = arg.rule;
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
- struct fib_rule *r = container_of(head, struct fib_rule, rcu);
- kfree(r);
+ return err;
}
-void fib_rule_put(struct fib_rule *r)
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
{
- if (atomic_dec_and_test(&r->r_clntref)) {
- if (r->r_dead)
- call_rcu(&r->rcu, fib_rule_put_rcu);
- else
- printk("Freeing alive rule %p\n", r);
+ int err = -EAGAIN;
+ struct fib_table *tbl;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+
+ case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
+ goto errout;
+
+ case FR_ACT_PROHIBIT:
+ err = -EACCES;
+ goto errout;
+
+ case FR_ACT_BLACKHOLE:
+ default:
+ err = -EINVAL;
+ goto errout;
}
+
+ if ((tbl = fib_get_table(rule->table)) == NULL)
+ goto errout;
+
+ err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+ if (err > 0)
+ err = -EAGAIN;
+errout:
+ return err;
}
-/* writer func called from netlink -- rtnl_sem hold*/
-int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct fib_rule *r, *new_r, *last = NULL;
- struct hlist_node *node = NULL;
- unsigned char table_id;
-
- if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
- (rtm->rtm_tos & ~IPTOS_TOS_MASK))
- return -EINVAL;
-
- if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
- return -EINVAL;
-
- table_id = rtm->rtm_table;
- if (table_id == RT_TABLE_UNSPEC) {
- struct fib_table *table;
- if (rtm->rtm_type == RTN_UNICAST) {
- if ((table = fib_empty_table()) == NULL)
- return -ENOBUFS;
- table_id = table->tb_id;
- }
+ if (res->r && res->r->action == FR_ACT_TO_TBL &&
+ FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+ struct fib_table *tb;
+ if ((tb = fib_get_table(res->r->table)) != NULL)
+ tb->tb_select_default(tb, flp, res);
}
+}
- new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
- if (!new_r)
- return -ENOMEM;
- memset(new_r, 0, sizeof(*new_r));
-
- if (rta[RTA_SRC-1])
- memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
- if (rta[RTA_DST-1])
- memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
- if (rta[RTA_GATEWAY-1])
- memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
- new_r->r_src_len = rtm->rtm_src_len;
- new_r->r_dst_len = rtm->rtm_dst_len;
- new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
- new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
- new_r->r_tos = rtm->rtm_tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if (rta[RTA_PROTOINFO-1])
- memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
-#endif
- new_r->r_action = rtm->rtm_type;
- new_r->r_flags = rtm->rtm_flags;
- if (rta[RTA_PRIORITY-1])
- memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
- new_r->r_table = table_id;
- if (rta[RTA_IIF-1]) {
- struct net_device *dev;
- rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
- new_r->r_ifindex = -1;
- dev = __dev_get_by_name(new_r->r_ifname);
- if (dev)
- new_r->r_ifindex = dev->ifindex;
- }
-#ifdef CONFIG_NET_CLS_ROUTE
- if (rta[RTA_FLOW-1])
- memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
-#endif
- r = container_of(fib_rules.first, struct fib_rule, hlist);
+static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+ struct fib4_rule *r = (struct fib4_rule *) rule;
+ __be32 daddr = fl->fl4_dst;
+ __be32 saddr = fl->fl4_src;
- if (!new_r->r_preference) {
- if (r && r->hlist.next != NULL) {
- r = container_of(r->hlist.next, struct fib_rule, hlist);
- if (r->r_preference)
- new_r->r_preference = r->r_preference - 1;
- }
- }
+ if (((saddr ^ r->src) & r->srcmask) ||
+ ((daddr ^ r->dst) & r->dstmask))
+ return 0;
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_preference > new_r->r_preference)
- break;
- last = r;
- }
- atomic_inc(&new_r->r_clntref);
+ if (r->tos && (r->tos != fl->fl4_tos))
+ return 0;
- if (last)
- hlist_add_after_rcu(&last->hlist, &new_r->hlist);
- else
- hlist_add_before_rcu(&new_r->hlist, &r->hlist);
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
+ return 0;
+#endif
- rtmsg_rule(RTM_NEWRULE, new_r);
- return 0;
+ return 1;
}
-#ifdef CONFIG_NET_CLS_ROUTE
-u32 fib_rules_tclass(struct fib_result *res)
+static struct fib_table *fib_empty_table(void)
{
- if (res->r)
- return res->r->r_tclassid;
- return 0;
+ u32 id;
+
+ for (id = 1; id <= RT_TABLE_MAX; id++)
+ if (fib_get_table(id) == NULL)
+ return fib_new_table(id);
+ return NULL;
}
-#endif
-/* callers should hold rtnl semaphore */
+static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+ [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+ [FRA_PRIORITY] = { .type = NLA_U32 },
+ [FRA_SRC] = { .type = NLA_U32 },
+ [FRA_DST] = { .type = NLA_U32 },
+ [FRA_FWMARK] = { .type = NLA_U32 },
+ [FRA_FWMASK] = { .type = NLA_U32 },
+ [FRA_FLOW] = { .type = NLA_U32 },
+ [FRA_TABLE] = { .type = NLA_U32 },
+};
-static void fib_rules_detach(struct net_device *dev)
+static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
{
- struct hlist_node *node;
- struct fib_rule *r;
+ int err = -EINVAL;
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+ if (frh->src_len > 32 || frh->dst_len > 32 ||
+ (frh->tos & ~IPTOS_TOS_MASK))
+ goto errout;
+
+ if (rule->table == RT_TABLE_UNSPEC) {
+ if (rule->action == FR_ACT_TO_TBL) {
+ struct fib_table *table;
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_ifindex == dev->ifindex)
- r->r_ifindex = -1;
+ table = fib_empty_table();
+ if (table == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+ rule->table = table->tb_id;
+ }
}
-}
-/* callers should hold rtnl semaphore */
+ if (tb[FRA_SRC])
+ rule4->src = nla_get_be32(tb[FRA_SRC]);
-static void fib_rules_attach(struct net_device *dev)
-{
- struct hlist_node *node;
- struct fib_rule *r;
+ if (tb[FRA_DST])
+ rule4->dst = nla_get_be32(tb[FRA_DST]);
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
- r->r_ifindex = dev->ifindex;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ if (tb[FRA_FWMARK]) {
+ rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+ if (rule4->fwmark)
+ /* compatibility: if the mark value is non-zero all bits
+ * are compared unless a mask is explicitly specified.
+ */
+ rule4->fwmask = 0xFFFFFFFF;
}
+
+ if (tb[FRA_FWMASK])
+ rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (tb[FRA_FLOW])
+ rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
+#endif
+
+ rule4->src_len = frh->src_len;
+ rule4->srcmask = inet_make_mask(rule4->src_len);
+ rule4->dst_len = frh->dst_len;
+ rule4->dstmask = inet_make_mask(rule4->dst_len);
+ rule4->tos = frh->tos;
+
+ err = 0;
+errout:
+ return err;
}
-int fib_lookup(const struct flowi *flp, struct fib_result *res)
+static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
{
- int err;
- struct fib_rule *r, *policy;
- struct fib_table *tb;
- struct hlist_node *node;
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- u32 daddr = flp->fl4_dst;
- u32 saddr = flp->fl4_src;
+ if (frh->src_len && (rule4->src_len != frh->src_len))
+ return 0;
-FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
- NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+ if (frh->dst_len && (rule4->dst_len != frh->dst_len))
+ return 0;
- rcu_read_lock();
+ if (frh->tos && (rule4->tos != frh->tos))
+ return 0;
- hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
- if (((saddr^r->r_src) & r->r_srcmask) ||
- ((daddr^r->r_dst) & r->r_dstmask) ||
- (r->r_tos && r->r_tos != flp->fl4_tos) ||
#ifdef CONFIG_IP_ROUTE_FWMARK
- (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+ if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+ return 0;
+
+ if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+ return 0;
#endif
- (r->r_ifindex && r->r_ifindex != flp->iif))
- continue;
-
-FRprintk("tb %d r %d ", r->r_table, r->r_action);
- switch (r->r_action) {
- case RTN_UNICAST:
- policy = r;
- break;
- case RTN_UNREACHABLE:
- rcu_read_unlock();
- return -ENETUNREACH;
- default:
- case RTN_BLACKHOLE:
- rcu_read_unlock();
- return -EINVAL;
- case RTN_PROHIBIT:
- rcu_read_unlock();
- return -EACCES;
- }
- if ((tb = fib_get_table(r->r_table)) == NULL)
- continue;
- err = tb->tb_lookup(tb, flp, res);
- if (err == 0) {
- res->r = policy;
- if (policy)
- atomic_inc(&policy->r_clntref);
- rcu_read_unlock();
- return 0;
- }
- if (err < 0 && err != -EAGAIN) {
- rcu_read_unlock();
- return err;
- }
- }
-FRprintk("FAILURE\n");
- rcu_read_unlock();
- return -ENETUNREACH;
-}
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
+ return 0;
+#endif
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
- if (res->r && res->r->r_action == RTN_UNICAST &&
- FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
- struct fib_table *tb;
- if ((tb = fib_get_table(res->r->r_table)) != NULL)
- tb->tb_select_default(tb, flp, res);
- }
-}
+ if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC])))
+ return 0;
-static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
+ if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST])))
+ return 0;
- if (event == NETDEV_UNREGISTER)
- fib_rules_detach(dev);
- else if (event == NETDEV_REGISTER)
- fib_rules_attach(dev);
- return NOTIFY_DONE;
+ return 1;
}
+static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
-static struct notifier_block fib_rules_notifier = {
- .notifier_call =fib_rules_event,
-};
+ frh->family = AF_INET;
+ frh->dst_len = rule4->dst_len;
+ frh->src_len = rule4->src_len;
+ frh->tos = rule4->tos;
-static __inline__ int inet_fill_rule(struct sk_buff *skb,
- struct fib_rule *r,
- u32 pid, u32 seq, int event,
- unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
- rtm->rtm_family = AF_INET;
- rtm->rtm_dst_len = r->r_dst_len;
- rtm->rtm_src_len = r->r_src_len;
- rtm->rtm_tos = r->r_tos;
#ifdef CONFIG_IP_ROUTE_FWMARK
- if (r->r_fwmark)
- RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+ if (rule4->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
+
+ if (rule4->fwmask || rule4->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
#endif
- rtm->rtm_table = r->r_table;
- rtm->rtm_protocol = 0;
- rtm->rtm_scope = 0;
- rtm->rtm_type = r->r_action;
- rtm->rtm_flags = r->r_flags;
-
- if (r->r_dst_len)
- RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
- if (r->r_src_len)
- RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
- if (r->r_ifname[0])
- RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
- if (r->r_preference)
- RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
- if (r->r_srcmap)
- RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+
+ if (rule4->dst_len)
+ NLA_PUT_BE32(skb, FRA_DST, rule4->dst);
+
+ if (rule4->src_len)
+ NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
+
#ifdef CONFIG_NET_CLS_ROUTE
- if (r->r_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+ if (rule4->tclassid)
+ NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ return 0;
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return -ENOBUFS;
}
-/* callers should hold rtnl semaphore */
-
-static void rtmsg_rule(int event, struct fib_rule *r)
+int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128);
- struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
-
- if (!skb)
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
- else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
- kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
- } else {
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
- }
+ return fib_rules_dump(skb, cb, AF_INET);
}
-int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static u32 fib4_rule_default_pref(void)
{
- int idx = 0;
- int s_idx = cb->args[0];
- struct fib_rule *r;
- struct hlist_node *node;
-
- rcu_read_lock();
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
-
- if (idx < s_idx)
- continue;
- if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_NEWRULE, NLM_F_MULTI) < 0)
- break;
- idx++;
+ struct list_head *pos;
+ struct fib_rule *rule;
+
+ if (!list_empty(&fib4_rules)) {
+ pos = fib4_rules.next;
+ if (pos->next != &fib4_rules) {
+ rule = list_entry(pos->next, struct fib_rule, list);
+ if (rule->pref)
+ return rule->pref - 1;
+ }
}
- rcu_read_unlock();
- cb->args[0] = idx;
- return skb->len;
+ return 0;
}
-void __init fib_rules_init(void)
+static struct fib_rules_ops fib4_rules_ops = {
+ .family = AF_INET,
+ .rule_size = sizeof(struct fib4_rule),
+ .action = fib4_rule_action,
+ .match = fib4_rule_match,
+ .configure = fib4_rule_configure,
+ .compare = fib4_rule_compare,
+ .fill = fib4_rule_fill,
+ .default_pref = fib4_rule_default_pref,
+ .nlgroup = RTNLGRP_IPV4_RULE,
+ .policy = fib4_rule_policy,
+ .rules_list = &fib4_rules,
+ .owner = THIS_MODULE,
+};
+
+void __init fib4_rules_init(void)
{
- INIT_HLIST_HEAD(&fib_rules);
- hlist_add_head(&local_rule.hlist, &fib_rules);
- hlist_add_after(&local_rule.hlist, &main_rule.hlist);
- hlist_add_after(&main_rule.hlist, &default_rule.hlist);
- register_netdevice_notifier(&fib_rules_notifier);
+ list_add_tail(&local_rule.common.list, &fib4_rules);
+ list_add_tail(&main_rule.common.list, &fib4_rules);
+ list_add_tail(&default_rule.common.list, &fib4_rules);
+
+ fib_rules_register(&fib4_rules_ops);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0f4145babb14..884d176e0082 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -15,7 +15,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
@@ -34,7 +33,6 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <linux/init.h>
#include <net/arp.h>
@@ -45,12 +43,14 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip_mp_alg.h>
+#include <net/netlink.h>
+#include <net/nexthop.h>
#include "fib_lookup.h"
#define FSprintk(a...)
-static DEFINE_RWLOCK(fib_info_lock);
+static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_hash_size;
@@ -160,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
void fib_release_info(struct fib_info *fi)
{
- write_lock(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
if (fi && --fi->fib_treeref == 0) {
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
@@ -173,7 +173,7 @@ void fib_release_info(struct fib_info *fi)
fi->fib_dead = 1;
fib_info_put(fi);
}
- write_unlock(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
}
static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -203,7 +203,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
unsigned int val = fi->fib_nhs;
val ^= fi->fib_protocol;
- val ^= fi->fib_prefsrc;
+ val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
@@ -248,14 +248,14 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
Used only by redirect accept routine.
*/
-int ip_fib_check_default(u32 gw, struct net_device *dev)
+int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct hlist_node *node;
struct fib_nh *nh;
unsigned int hash;
- read_lock(&fib_info_lock);
+ spin_lock(&fib_info_lock);
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
@@ -263,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
if (nh->nh_dev == dev &&
nh->nh_gw == gw &&
!(nh->nh_flags&RTNH_F_DEAD)) {
- read_unlock(&fib_info_lock);
+ spin_unlock(&fib_info_lock);
return 0;
}
}
- read_unlock(&fib_info_lock);
+ spin_unlock(&fib_info_lock);
return -1;
}
-void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
- int z, int tb_id,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
+ int dst_len, u32 tb_id, struct nl_info *info)
{
struct sk_buff *skb;
- u32 pid = req ? req->pid : n->nlmsg_pid;
- int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-
- skb = alloc_skb(size, GFP_KERNEL);
- if (!skb)
- return;
-
- if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
- fa->fa_type, fa->fa_scope, &key, z,
- fa->fa_tos,
- fa->fa_info, 0) < 0) {
+ int payload = sizeof(struct rtmsg) + 256;
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+ fa->fa_type, fa->fa_scope, key, dst_len,
+ fa->fa_tos, fa->fa_info, 0);
+ if (err < 0) {
kfree_skb(skb);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
- if (n->nlmsg_flags&NLM_F_ECHO)
- atomic_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
- if (n->nlmsg_flags&NLM_F_ECHO)
- netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+ err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+ info->nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
}
/* Return the first fib alias matching TOS with
@@ -343,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
-{
- while (RTA_OK(attr,attrlen)) {
- if (attr->rta_type == type)
- return *(u32*)RTA_DATA(attr);
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
-
-static int
-fib_count_nexthops(struct rtattr *rta)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
{
int nhs = 0;
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
- while (nhlen >= (int)sizeof(struct rtnexthop)) {
- if ((nhlen -= nhp->rtnh_len) < 0)
- return 0;
+ while (rtnh_ok(rtnh, remaining)) {
nhs++;
- nhp = RTNH_NEXT(nhp);
- };
- return nhs;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ /* leftover implies invalid nexthop configuration, discard it */
+ return remaining > 0 ? 0 : nhs;
}
-static int
-fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ int remaining, struct fib_config *cfg)
{
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
-
change_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ int attrlen;
+
+ if (!rtnh_ok(rtnh, remaining))
return -EINVAL;
- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
- nh->nh_oif = nhp->rtnh_ifindex;
- nh->nh_weight = nhp->rtnh_hops + 1;
- if (attrlen) {
- nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+ nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+ nh->nh_oif = rtnh->rtnh_ifindex;
+ nh->nh_weight = rtnh->rtnh_hops + 1;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ nh->nh_gw = nla ? nla_get_be32(nla) : 0;
#ifdef CONFIG_NET_CLS_ROUTE
- nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+ nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
#endif
}
- nhp = RTNH_NEXT(nhp);
+
+ rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
+
return 0;
}
#endif
-int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
- struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- struct rtnexthop *nhp;
- int nhlen;
+ struct rtnexthop *rtnh;
+ int remaining;
#endif
- if (rta->rta_priority &&
- *rta->rta_priority != fi->fib_priority)
+ if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
- if (rta->rta_oif || rta->rta_gw) {
- if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
- (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+ if (cfg->fc_oif || cfg->fc_gw) {
+ if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+ (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
return 1;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (rta->rta_mp == NULL)
+ if (cfg->fc_mp == NULL)
return 0;
- nhp = RTA_DATA(rta->rta_mp);
- nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+ rtnh = cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
for_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- u32 gw;
+ int attrlen;
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ if (!rtnh_ok(rtnh, remaining))
return -EINVAL;
- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+
+ if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
return 1;
- if (attrlen) {
- gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
- if (gw && gw != nh->nh_gw)
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen < 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla && nla_get_be32(nla) != nh->nh_gw)
return 1;
#ifdef CONFIG_NET_CLS_ROUTE
- gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
- if (gw && gw != nh->nh_tclassid)
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+ if (nla && nla_get_u32(nla) != nh->nh_tclassid)
return 1;
#endif
}
- nhp = RTNH_NEXT(nhp);
+
+ rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
#endif
return 0;
@@ -489,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
|-> {local prefix} (terminal node)
*/
-static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+ struct fib_nh *nh)
{
int err;
@@ -503,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
if (nh->nh_flags&RTNH_F_ONLINK) {
struct net_device *dev;
- if (r->rtm_scope >= RT_SCOPE_LINK)
+ if (cfg->fc_scope >= RT_SCOPE_LINK)
return -EINVAL;
if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
return -EINVAL;
@@ -517,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
return 0;
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = nh->nh_gw,
- .scope = r->rtm_scope + 1 } },
- .oif = nh->nh_oif };
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = nh->nh_gw,
+ .scope = cfg->fc_scope + 1,
+ },
+ },
+ .oif = nh->nh_oif,
+ };
/* It is not necessary, but requires a bit of thinking */
if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -564,11 +568,11 @@ out:
return 0;
}
-static inline unsigned int fib_laddr_hashfn(u32 val)
+static inline unsigned int fib_laddr_hashfn(__be32 val)
{
unsigned int mask = (fib_hash_size - 1);
- return (val ^ (val >> 7) ^ (val >> 14)) & mask;
+ return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
}
static struct hlist_head *fib_hash_alloc(int bytes)
@@ -599,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_hash_size;
unsigned int i, bytes;
- write_lock(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_hash_size = new_size;
@@ -640,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
}
fib_info_laddrhash = new_laddrhash;
- write_unlock(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
bytes = old_size * sizeof(struct hlist_head *);
fib_hash_free(old_info_hash, bytes);
fib_hash_free(old_laddrhash, bytes);
}
-struct fib_info *
-fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
- const struct nlmsghdr *nlh, int *errp)
+struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
struct fib_info *fi = NULL;
struct fib_info *ofi;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
int nhs = 1;
-#else
- const int nhs = 1;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- u32 mp_alg = IP_MP_ALG_NONE;
-#endif
/* Fast check to catch the most weird cases */
- if (fib_props[r->rtm_type].scope > r->rtm_scope)
+ if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
goto err_inval;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (rta->rta_mp) {
- nhs = fib_count_nexthops(rta->rta_mp);
+ if (cfg->fc_mp) {
+ nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
if (nhs == 0)
goto err_inval;
}
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (rta->rta_mp_alg) {
- mp_alg = *rta->rta_mp_alg;
-
- if (mp_alg < IP_MP_ALG_NONE ||
- mp_alg > IP_MP_ALG_MAX)
+ if (cfg->fc_mp_alg) {
+ if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+ cfg->fc_mp_alg > IP_MP_ALG_MAX)
goto err_inval;
}
#endif
@@ -710,49 +703,47 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
goto failure;
}
- fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
fib_info_cnt++;
- memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
- fi->fib_protocol = r->rtm_protocol;
+ fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_flags = cfg->fc_flags;
+ fi->fib_priority = cfg->fc_priority;
+ fi->fib_prefsrc = cfg->fc_prefsrc;
fi->fib_nhs = nhs;
change_nexthops(fi) {
nh->nh_parent = fi;
} endfor_nexthops(fi)
- fi->fib_flags = r->rtm_flags;
- if (rta->rta_priority)
- fi->fib_priority = *rta->rta_priority;
- if (rta->rta_mx) {
- int attrlen = RTA_PAYLOAD(rta->rta_mx);
- struct rtattr *attr = RTA_DATA(rta->rta_mx);
-
- while (RTA_OK(attr, attrlen)) {
- unsigned flavor = attr->rta_type;
- if (flavor) {
- if (flavor > RTAX_MAX)
+ if (cfg->fc_mx) {
+ struct nlattr *nla;
+ int remaining;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla->nla_type;
+
+ if (type) {
+ if (type > RTAX_MAX)
goto err_inval;
- fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+ fi->fib_metrics[type - 1] = nla_get_u32(nla);
}
- attr = RTA_NEXT(attr, attrlen);
}
}
- if (rta->rta_prefsrc)
- memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
- if (rta->rta_mp) {
+ if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+ if (err != 0)
goto failure;
- if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+ if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
goto err_inval;
- if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+ if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
goto err_inval;
#ifdef CONFIG_NET_CLS_ROUTE
- if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+ if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
goto err_inval;
#endif
#else
@@ -760,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
#endif
} else {
struct fib_nh *nh = fi->fib_nh;
- if (rta->rta_oif)
- nh->nh_oif = *rta->rta_oif;
- if (rta->rta_gw)
- memcpy(&nh->nh_gw, rta->rta_gw, 4);
+
+ nh->nh_oif = cfg->fc_oif;
+ nh->nh_gw = cfg->fc_gw;
+ nh->nh_flags = cfg->fc_flags;
#ifdef CONFIG_NET_CLS_ROUTE
- if (rta->rta_flow)
- memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+ nh->nh_tclassid = cfg->fc_flow;
#endif
- nh->nh_flags = r->rtm_flags;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight = 1;
#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- fi->fib_mp_alg = mp_alg;
+ fi->fib_mp_alg = cfg->fc_mp_alg;
#endif
- if (fib_props[r->rtm_type].error) {
- if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+ if (fib_props[cfg->fc_type].error) {
+ if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
goto err_inval;
goto link_it;
}
- if (r->rtm_scope > RT_SCOPE_HOST)
+ if (cfg->fc_scope > RT_SCOPE_HOST)
goto err_inval;
- if (r->rtm_scope == RT_SCOPE_HOST) {
+ if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -800,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
goto failure;
} else {
change_nexthops(fi) {
- if ((err = fib_check_nh(r, fi, nh)) != 0)
+ if ((err = fib_check_nh(cfg, fi, nh)) != 0)
goto failure;
} endfor_nexthops(fi)
}
if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
- memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+ if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ fi->fib_prefsrc != cfg->fc_dst)
if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
@@ -822,7 +811,7 @@ link_it:
fi->fib_treeref++;
atomic_inc(&fi->fib_clntref);
- write_lock(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
@@ -841,24 +830,24 @@ link_it:
head = &fib_info_devhash[hash];
hlist_add_head(&nh->nh_hash, head);
} endfor_nexthops(fi)
- write_unlock(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
err = -EINVAL;
failure:
- *errp = err;
if (fi) {
fi->fib_dead = 1;
free_fib_info(fi);
}
- return NULL;
+
+ return ERR_PTR(err);
}
/* Note! fib_semantic_match intentionally uses RCU list functions. */
int fib_semantic_match(struct list_head *head, const struct flowi *flp,
- struct fib_result *res, __u32 zone, __u32 mask,
+ struct fib_result *res, __be32 zone, __be32 mask,
int prefixlen)
{
struct fib_alias *fa;
@@ -925,8 +914,7 @@ out_fill_res:
res->fi = fa->fa_info;
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
res->netmask = mask;
- res->network = zone &
- (0xFFFFFFFF >> (32 - prefixlen));
+ res->network = zone & inet_make_mask(prefixlen);
#endif
atomic_inc(&res->fi->fib_clntref);
return 0;
@@ -934,225 +922,94 @@ out_fill_res:
/* Find appropriate source address to this destination */
-u32 __fib_res_prefsrc(struct fib_result *res)
+__be32 __fib_res_prefsrc(struct fib_result *res)
{
return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
}
-int
-fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
- struct fib_info *fi, unsigned int flags)
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+ u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+ struct fib_info *fi, unsigned int flags)
{
+ struct nlmsghdr *nlh;
struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = dst_len;
rtm->rtm_src_len = 0;
rtm->rtm_tos = tos;
rtm->rtm_table = tb_id;
+ NLA_PUT_U32(skb, RTA_TABLE, tb_id);
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = scope;
- if (rtm->rtm_dst_len)
- RTA_PUT(skb, RTA_DST, 4, dst);
rtm->rtm_protocol = fi->fib_protocol;
+
+ if (rtm->rtm_dst_len)
+ NLA_PUT_BE32(skb, RTA_DST, dst);
+
if (fi->fib_priority)
- RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
-#ifdef CONFIG_NET_CLS_ROUTE
- if (fi->fib_nh[0].nh_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
-#endif
+ NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+
if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
+
if (fi->fib_prefsrc)
- RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+ NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+
if (fi->fib_nhs == 1) {
if (fi->fib_nh->nh_gw)
- RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+ NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+
if (fi->fib_nh->nh_oif)
- RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+ NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (fi->fib_nh[0].nh_tclassid)
+ NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
+#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
- struct rtnexthop *nhp;
- struct rtattr *mp_head;
- if (skb_tailroom(skb) <= RTA_SPACE(0))
- goto rtattr_failure;
- mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
-
- for_nexthops(fi) {
- if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
- goto rtattr_failure;
- nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
- nhp->rtnh_flags = nh->nh_flags & 0xFF;
- nhp->rtnh_hops = nh->nh_weight-1;
- nhp->rtnh_ifindex = nh->nh_oif;
- if (nh->nh_gw)
- RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
- nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
- } endfor_nexthops(fi);
- mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
- }
-#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
+ struct rtnexthop *rtnh;
+ struct nlattr *mp;
-#ifndef CONFIG_IP_NOSIOCRT
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (mp == NULL)
+ goto nla_put_failure;
-int
-fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
- struct kern_rta *rta, struct rtentry *r)
-{
- int plen;
- u32 *ptr;
-
- memset(rtm, 0, sizeof(*rtm));
- memset(rta, 0, sizeof(*rta));
-
- if (r->rt_dst.sa_family != AF_INET)
- return -EAFNOSUPPORT;
-
- /* Check mask for validity:
- a) it must be contiguous.
- b) destination must have all host bits clear.
- c) if application forgot to set correct family (AF_INET),
- reject request unless it is absolutely clear i.e.
- both family and mask are zero.
- */
- plen = 32;
- ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
- if (!(r->rt_flags&RTF_HOST)) {
- u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
- if (r->rt_genmask.sa_family != AF_INET) {
- if (mask || r->rt_genmask.sa_family)
- return -EAFNOSUPPORT;
- }
- if (bad_mask(mask, *ptr))
- return -EINVAL;
- plen = inet_mask_len(mask);
- }
-
- nl->nlmsg_flags = NLM_F_REQUEST;
- nl->nlmsg_pid = 0;
- nl->nlmsg_seq = 0;
- nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
- if (cmd == SIOCDELRT) {
- nl->nlmsg_type = RTM_DELROUTE;
- nl->nlmsg_flags = 0;
- } else {
- nl->nlmsg_type = RTM_NEWROUTE;
- nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
- rtm->rtm_protocol = RTPROT_BOOT;
- }
+ for_nexthops(fi) {
+ rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+ if (rtnh == NULL)
+ goto nla_put_failure;
- rtm->rtm_dst_len = plen;
- rta->rta_dst = ptr;
+ rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+ rtnh->rtnh_hops = nh->nh_weight - 1;
+ rtnh->rtnh_ifindex = nh->nh_oif;
- if (r->rt_metric) {
- *(u32*)&r->rt_pad3 = r->rt_metric - 1;
- rta->rta_priority = (u32*)&r->rt_pad3;
- }
- if (r->rt_flags&RTF_REJECT) {
- rtm->rtm_scope = RT_SCOPE_HOST;
- rtm->rtm_type = RTN_UNREACHABLE;
- return 0;
- }
- rtm->rtm_scope = RT_SCOPE_NOWHERE;
- rtm->rtm_type = RTN_UNICAST;
-
- if (r->rt_dev) {
- char *colon;
- struct net_device *dev;
- char devname[IFNAMSIZ];
-
- if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
- return -EFAULT;
- devname[IFNAMSIZ-1] = 0;
- colon = strchr(devname, ':');
- if (colon)
- *colon = 0;
- dev = __dev_get_by_name(devname);
- if (!dev)
- return -ENODEV;
- rta->rta_oif = &dev->ifindex;
- if (colon) {
- struct in_ifaddr *ifa;
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
- if (!in_dev)
- return -ENODEV;
- *colon = ':';
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
- if (strcmp(ifa->ifa_label, devname) == 0)
- break;
- if (ifa == NULL)
- return -ENODEV;
- rta->rta_prefsrc = &ifa->ifa_local;
- }
- }
+ if (nh->nh_gw)
+ NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (nh->nh_tclassid)
+ NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
+#endif
+ /* length of rtnetlink header + attributes */
+ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
+ } endfor_nexthops(fi);
- ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
- if (r->rt_gateway.sa_family == AF_INET && *ptr) {
- rta->rta_gw = ptr;
- if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
- rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ nla_nest_end(skb, mp);
}
+#endif
+ return nlmsg_end(skb, nlh);
- if (cmd == SIOCDELRT)
- return 0;
-
- if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
- return -EINVAL;
-
- if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
- rtm->rtm_scope = RT_SCOPE_LINK;
-
- if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
- struct rtattr *rec;
- struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
- if (mx == NULL)
- return -ENOMEM;
- rta->rta_mx = mx;
- mx->rta_type = RTA_METRICS;
- mx->rta_len = RTA_LENGTH(0);
- if (r->rt_flags&RTF_MTU) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_ADVMSS;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
- }
- if (r->rt_flags&RTF_WINDOW) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_WINDOW;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_window;
- }
- if (r->rt_flags&RTF_IRTT) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_RTT;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
- }
- }
- return 0;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
-#endif
-
/*
Update FIB if:
- local address disappeared -> we must delete all the entries
@@ -1160,7 +1017,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
- device went down -> we must shutdown all nexthops going via it.
*/
-int fib_sync_down(u32 local, struct net_device *dev, int force)
+int fib_sync_down(__be32 local, struct net_device *dev, int force)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 95a639f2e3db..d17990ec724f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -52,7 +52,6 @@
#define VERSION "0.407"
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
@@ -1125,17 +1124,14 @@ err:
return fa_head;
}
-static int
-fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
struct fib_alias *fa, *new_fa;
struct list_head *fa_head = NULL;
struct fib_info *fi;
- int plen = r->rtm_dst_len;
- int type = r->rtm_type;
- u8 tos = r->rtm_tos;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
u32 key, mask;
int err;
struct leaf *l;
@@ -1143,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (plen > 32)
return -EINVAL;
- key = 0;
- if (rta->rta_dst)
- memcpy(&key, rta->rta_dst, 4);
-
- key = ntohl(key);
+ key = ntohl(cfg->fc_dst);
- pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+ pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
mask = ntohl(inet_make_mask(plen));
@@ -1158,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
key = key & mask;
- fi = fib_create_info(r, rta, nlhdr, &err);
-
- if (!fi)
+ fi = fib_create_info(cfg);
+ if (IS_ERR(fi)) {
+ err = PTR_ERR(fi);
goto err;
+ }
l = fib_find_node(t, key);
fa = NULL;
@@ -1186,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct fib_alias *fa_orig;
err = -EEXIST;
- if (nlhdr->nlmsg_flags & NLM_F_EXCL)
+ if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
- if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
+ if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
@@ -1201,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
fi_drop = fa->fa_info;
new_fa->fa_tos = fa->fa_tos;
new_fa->fa_info = fi;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
new_fa->fa_state &= ~FA_S_ACCESSED;
@@ -1225,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
- if (fa->fa_type == type &&
- fa->fa_scope == r->rtm_scope &&
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi) {
goto out;
}
}
- if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
+ if (!(cfg->fc_nlflags & NLM_F_APPEND))
fa = fa_orig;
}
err = -ENOENT;
- if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
+ if (!(cfg->fc_nlflags & NLM_F_CREATE))
goto out;
err = -ENOBUFS;
@@ -1245,16 +1238,16 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
* Insert new entry to the list.
*/
if (!fa_head) {
- fa_head = fib_insert_node(t, &err, key, plen);
err = 0;
+ fa_head = fib_insert_node(t, &err, key, plen);
if (err)
goto out_free_new_fa;
}
@@ -1263,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
(fa ? &fa->fa_list : fa_head));
rt_cache_flush(-1);
- rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo);
succeeded:
return 0;
@@ -1282,18 +1276,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l,
struct fib_result *res)
{
int err, i;
- t_key mask;
+ __be32 mask;
struct leaf_info *li;
struct hlist_head *hhead = &l->list;
struct hlist_node *node;
hlist_for_each_entry_rcu(li, node, hhead, hlist) {
i = li->plen;
- mask = ntohl(inet_make_mask(i));
- if (l->key != (key & mask))
+ mask = inet_make_mask(i);
+ if (l->key != (key & ntohl(mask)))
continue;
- if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
+ if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
*plen = i;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.semantic_match_passed++;
@@ -1549,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key)
return 1;
}
-static int
-fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
u32 key, mask;
- int plen = r->rtm_dst_len;
- u8 tos = r->rtm_tos;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
struct fib_alias *fa, *fa_to_delete;
struct list_head *fa_head;
struct leaf *l;
struct leaf_info *li;
-
if (plen > 32)
return -EINVAL;
- key = 0;
- if (rta->rta_dst)
- memcpy(&key, rta->rta_dst, 4);
-
- key = ntohl(key);
+ key = ntohl(cfg->fc_dst);
mask = ntohl(inet_make_mask(plen));
if (key & ~mask)
@@ -1599,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (fa->fa_tos != tos)
break;
- if ((!r->rtm_type ||
- fa->fa_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- fib_nh_match(r, nlhdr, rta, fi) == 0) {
+ if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+ (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+ fa->fa_scope == cfg->fc_scope) &&
+ (!cfg->fc_protocol ||
+ fi->fib_protocol == cfg->fc_protocol) &&
+ fib_nh_match(cfg, fi) == 0) {
fa_to_delete = fa;
break;
}
@@ -1615,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
return -ESRCH;
fa = fa_to_delete;
- rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+ rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo);
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
@@ -1847,9 +1834,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
int i, s_i;
struct fib_alias *fa;
- u32 xkey = htonl(key);
+ __be32 xkey = htonl(key);
- s_i = cb->args[3];
+ s_i = cb->args[4];
i = 0;
/* rcu_read_lock is hold by caller */
@@ -1867,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
tb->tb_id,
fa->fa_type,
fa->fa_scope,
- &xkey,
+ xkey,
plen,
fa->fa_tos,
fa->fa_info, 0) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
i++;
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -1887,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
struct list_head *fa_head;
struct leaf *l = NULL;
- s_h = cb->args[2];
+ s_h = cb->args[3];
for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
if (h < s_h)
continue;
if (h > s_h)
- memset(&cb->args[3], 0,
- sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
fa_head = get_fa_head(l, plen);
@@ -1905,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
continue;
if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
- cb->args[2] = h;
+ cb->args[3] = h;
return -1;
}
}
- cb->args[2] = h;
+ cb->args[3] = h;
return skb->len;
}
@@ -1918,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
int m, s_m;
struct trie *t = (struct trie *) tb->tb_data;
- s_m = cb->args[1];
+ s_m = cb->args[2];
rcu_read_lock();
for (m = 0; m <= 32; m++) {
if (m < s_m)
continue;
if (m > s_m)
- memset(&cb->args[2], 0,
- sizeof(cb->args) - 2*sizeof(cb->args[0]));
+ memset(&cb->args[3], 0,
+ sizeof(cb->args) - 3*sizeof(cb->args[0]));
if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
- cb->args[1] = m;
+ cb->args[2] = m;
goto out;
}
}
rcu_read_unlock();
- cb->args[1] = m;
+ cb->args[2] = m;
return skb->len;
out:
rcu_read_unlock();
@@ -1944,9 +1931,9 @@ out:
/* Fix more generic FIB names for init later */
#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
#else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
#endif
{
struct fib_table *tb;
@@ -2294,7 +2281,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
if (IS_TNODE(n)) {
struct tnode *tn = (struct tnode *) n;
- t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
+ __be32 prf = htonl(MASK_PFX(tn->key, tn->pos));
if (!NODE_PARENT(n)) {
if (iter->trie == trie_local)
@@ -2310,7 +2297,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
} else {
struct leaf *l = (struct leaf *) n;
int i;
- u32 val = ntohl(l->key);
+ __be32 val = htonl(l->key);
seq_indent(seq, iter->depth);
seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
@@ -2373,7 +2360,7 @@ static struct file_operations fib_trie_fops = {
.release = seq_release_private,
};
-static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
+static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
{
static unsigned type2flags[RTN_MAX + 1] = {
[7] = RTF_REJECT, [8] = RTF_REJECT,
@@ -2382,7 +2369,7 @@ static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
if (fi && fi->fib_nh->nh_gw)
flags |= RTF_GATEWAY;
- if (mask == 0xFFFFFFFF)
+ if (mask == htonl(0xFFFFFFFF))
flags |= RTF_HOST;
flags |= RTF_UP;
return flags;
@@ -2416,7 +2403,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
for (i=32; i>=0; i--) {
struct leaf_info *li = find_leaf_info(l, i);
struct fib_alias *fa;
- u32 mask, prefix;
+ __be32 mask, prefix;
if (!li)
continue;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 017900172f7d..b39a37a47545 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -64,7 +64,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/jiffies.h>
@@ -105,7 +104,7 @@ struct icmp_bxm {
struct {
struct icmphdr icmph;
- __u32 times[3];
+ __be32 times[3];
} data;
int head_len;
struct ip_options replyopts;
@@ -188,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
};
/* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts = 1;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
/* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses = 1;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
/*
* Configurable global rate limit.
@@ -206,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1;
* time exceeded (11), parameter problem (12)
*/
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
/*
* ICMP control array. This specifies what to do with each ICMP.
@@ -382,7 +381,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
struct rtable *rt = (struct rtable *)skb->dst;
- u32 daddr;
+ __be32 daddr;
if (ip_options_echo(&icmp_param->replyopts, skb))
return;
@@ -407,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(skb->nh.iph->tos) } },
.proto = IPPROTO_ICMP };
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
@@ -430,14 +430,14 @@ out_unlock:
* MUST reply to only the first fragment.
*/
-void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
+void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct iphdr *iph;
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = (struct rtable *)skb_in->dst;
struct ipcm_cookie ipc;
- u32 saddr;
+ __be32 saddr;
u8 tos;
if (!rt)
@@ -561,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
}
}
};
+ security_skb_classify_flow(skb_in, &fl);
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
@@ -894,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb)
if (in_dev->ifa_list &&
IN_DEV_LOG_MARTIANS(in_dev) &&
IN_DEV_FORWARD(in_dev)) {
- u32 _mask, *mp;
+ __be32 _mask, *mp;
mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
BUG_ON(mp == NULL);
@@ -929,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (!(u16)csum_fold(skb->csum))
break;
/* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index ab680c851aa2..6eee71647b7c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -72,7 +72,6 @@
* Vinay Kulkarni
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -139,14 +138,14 @@
time_before(jiffies, (in_dev)->mr_v2_seen)))
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
-static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr);
+static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
static void igmpv3_clear_delrec(struct in_device *in_dev);
static int sf_setstate(struct ip_mc_list *pmc);
static void sf_markstate(struct ip_mc_list *pmc);
#endif
static void ip_mc_clear_src(struct ip_mc_list *pmc);
-static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
- int sfcount, __u32 *psfsrc, int delta);
+static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ int sfcount, __be32 *psfsrc, int delta);
static void ip_ma_put(struct ip_mc_list *im)
{
@@ -427,7 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
first = 1;
psf_prev = NULL;
for (psf=*psf_list; psf; psf=psf_next) {
- u32 *psrc;
+ __be32 *psrc;
psf_next = psf->sf_next;
@@ -440,7 +439,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (isquery)
psf->sf_gsresp = 0;
- if (AVAILABLE(skb) < sizeof(u32) +
+ if (AVAILABLE(skb) < sizeof(__be32) +
first*sizeof(struct igmpv3_grec)) {
if (truncate && !first)
break; /* truncate these */
@@ -456,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
skb = add_grhead(skb, pmc, type, &pgr);
first = 0;
}
- psrc = (u32 *)skb_put(skb, sizeof(u32));
+ psrc = (__be32 *)skb_put(skb, sizeof(__be32));
*psrc = psf->sf_inaddr;
scount++; stotal++;
if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
@@ -631,8 +630,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
struct igmphdr *ih;
struct rtable *rt;
struct net_device *dev = in_dev->dev;
- u32 group = pmc ? pmc->multiaddr : 0;
- u32 dst;
+ __be32 group = pmc ? pmc->multiaddr : 0;
+ __be32 dst;
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
@@ -749,7 +748,7 @@ static void igmp_timer_expire(unsigned long data)
}
/* mark EXCLUDE-mode sources */
-static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
{
struct ip_sf_list *psf;
int i, scount;
@@ -776,7 +775,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
return 1;
}
-static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
{
struct ip_sf_list *psf;
int i, scount;
@@ -804,7 +803,7 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
return 1;
}
-static void igmp_heard_report(struct in_device *in_dev, u32 group)
+static void igmp_heard_report(struct in_device *in_dev, __be32 group)
{
struct ip_mc_list *im;
@@ -829,7 +828,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
struct igmphdr *ih = skb->h.igmph;
struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
struct ip_mc_list *im;
- u32 group = ih->group;
+ __be32 group = ih->group;
int max_delay;
int mark = 0;
@@ -863,7 +862,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
ih3 = (struct igmpv3_query *) skb->h.raw;
if (ih3->nsrcs) {
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
- + ntohs(ih3->nsrcs)*sizeof(__u32)))
+ + ntohs(ih3->nsrcs)*sizeof(__be32)))
return;
ih3 = (struct igmpv3_query *) skb->h.raw;
}
@@ -932,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
goto drop;
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (!(u16)csum_fold(skb->csum))
break;
/* fall through */
@@ -986,7 +985,7 @@ drop:
* Add a filter to a device
*/
-static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
+static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
{
char buf[MAX_ADDR_LEN];
struct net_device *dev = in_dev->dev;
@@ -1006,7 +1005,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
* Remove a filter from a device
*/
-static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
+static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
{
char buf[MAX_ADDR_LEN];
struct net_device *dev = in_dev->dev;
@@ -1029,10 +1028,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
* for deleted items allows change reports to use common code with
* non-deleted or query-response MCA's.
*/
- pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
+ pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
- memset(pmc, 0, sizeof(*pmc));
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
@@ -1057,7 +1055,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
spin_unlock_bh(&in_dev->mc_tomb_lock);
}
-static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
+static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
{
struct ip_mc_list *pmc, *pmc_prev;
struct ip_sf_list *psf, *psf_next;
@@ -1195,7 +1193,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* A socket has joined a multicast group on device dev.
*/
-void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
{
struct ip_mc_list *im;
@@ -1254,7 +1252,7 @@ out:
* A socket has left a multicast group on device dev
*/
-void ip_mc_dec_group(struct in_device *in_dev, u32 addr)
+void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
{
struct ip_mc_list *i, **ip;
@@ -1399,12 +1397,12 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
/*
* Join a socket to a group
*/
-int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf = IP_MAX_MSF;
+int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
- __u32 *psfsrc)
+ __be32 *psfsrc)
{
struct ip_sf_list *psf, *psf_prev;
int rv = 0;
@@ -1452,8 +1450,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#define igmp_ifc_event(x) do { } while (0)
#endif
-static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
- int sfcount, __u32 *psfsrc, int delta)
+static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ int sfcount, __be32 *psfsrc, int delta)
{
struct ip_mc_list *pmc;
int changerec = 0;
@@ -1519,7 +1517,7 @@ out_unlock:
* Add multicast single-source filter to the interface list
*/
static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
- __u32 *psfsrc, int delta)
+ __be32 *psfsrc, int delta)
{
struct ip_sf_list *psf, *psf_prev;
@@ -1530,10 +1528,9 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
psf_prev = psf;
}
if (!psf) {
- psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
+ psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
if (!psf)
return -ENOBUFS;
- memset(psf, 0, sizeof(*psf));
psf->sf_inaddr = *psfsrc;
if (psf_prev) {
psf_prev->sf_next = psf;
@@ -1626,8 +1623,8 @@ static int sf_setstate(struct ip_mc_list *pmc)
/*
* Add multicast source filter list to the interface list
*/
-static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
- int sfcount, __u32 *psfsrc, int delta)
+static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ int sfcount, __be32 *psfsrc, int delta)
{
struct ip_mc_list *pmc;
int isexclude;
@@ -1720,7 +1717,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
{
int err;
- u32 addr = imr->imr_multiaddr.s_addr;
+ __be32 addr = imr->imr_multiaddr.s_addr;
struct ip_mc_socklist *iml=NULL, *i;
struct in_device *in_dev;
struct inet_sock *inet = inet_sk(sk);
@@ -1794,31 +1791,37 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *iml, **imlp;
struct in_device *in_dev;
- u32 group = imr->imr_multiaddr.s_addr;
+ __be32 group = imr->imr_multiaddr.s_addr;
u32 ifindex;
+ int ret = -EADDRNOTAVAIL;
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
- if (!in_dev) {
- rtnl_unlock();
- return -ENODEV;
- }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
- if (iml->multi.imr_multiaddr.s_addr == group &&
- iml->multi.imr_ifindex == ifindex) {
- (void) ip_mc_leave_src(sk, iml, in_dev);
+ if (iml->multi.imr_multiaddr.s_addr != group)
+ continue;
+ if (ifindex) {
+ if (iml->multi.imr_ifindex != ifindex)
+ continue;
+ } else if (imr->imr_address.s_addr && imr->imr_address.s_addr !=
+ iml->multi.imr_address.s_addr)
+ continue;
+
+ (void) ip_mc_leave_src(sk, iml, in_dev);
- *imlp = iml->next;
+ *imlp = iml->next;
+ if (in_dev)
ip_mc_dec_group(in_dev, group);
- rtnl_unlock();
- sock_kfree_s(sk, iml, sizeof(*iml));
- return 0;
- }
+ rtnl_unlock();
+ sock_kfree_s(sk, iml, sizeof(*iml));
+ return 0;
}
+ if (!in_dev)
+ ret = -ENODEV;
rtnl_unlock();
- return -EADDRNOTAVAIL;
+ return ret;
}
int ip_mc_source(int add, int omode, struct sock *sk, struct
@@ -1826,7 +1829,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
{
int err;
struct ip_mreqn imr;
- u32 addr = mreqs->imr_multiaddr;
+ __be32 addr = mreqs->imr_multiaddr;
struct ip_mc_socklist *pmc;
struct in_device *in_dev = NULL;
struct inet_sock *inet = inet_sk(sk);
@@ -1880,7 +1883,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
rv = !0;
for (i=0; i<psl->sl_count; i++) {
rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
- sizeof(__u32));
+ sizeof(__be32));
if (rv == 0)
break;
}
@@ -1932,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i=0; i<psl->sl_count; i++) {
rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
- sizeof(__u32));
+ sizeof(__be32));
if (rv == 0)
break;
}
@@ -1957,7 +1960,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
{
int err = 0;
struct ip_mreqn imr;
- u32 addr = msf->imsf_multiaddr;
+ __be32 addr = msf->imsf_multiaddr;
struct ip_mc_socklist *pmc;
struct in_device *in_dev;
struct inet_sock *inet = inet_sk(sk);
@@ -2041,7 +2044,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
{
int err, len, count, copycount;
struct ip_mreqn imr;
- u32 addr = msf->imsf_multiaddr;
+ __be32 addr = msf->imsf_multiaddr;
struct ip_mc_socklist *pmc;
struct in_device *in_dev;
struct inet_sock *inet = inet_sk(sk);
@@ -2100,7 +2103,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
{
int err, i, count, copycount;
struct sockaddr_in *psin;
- u32 addr;
+ __be32 addr;
struct ip_mc_socklist *pmc;
struct inet_sock *inet = inet_sk(sk);
struct ip_sf_socklist *psl;
@@ -2153,7 +2156,7 @@ done:
/*
* check if a multicast source filter allows delivery for a given <src,dst,intf>
*/
-int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *pmc;
@@ -2202,18 +2205,18 @@ void ip_mc_drop_socket(struct sock *sk)
struct in_device *in_dev;
inet->mc_list = iml->next;
- if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) {
- (void) ip_mc_leave_src(sk, iml, in_dev);
+ in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+ (void) ip_mc_leave_src(sk, iml, in_dev);
+ if (in_dev != NULL) {
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
in_dev_put(in_dev);
}
sock_kfree_s(sk, iml, sizeof(*iml));
-
}
rtnl_unlock();
}
-int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto)
+int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
{
struct ip_mc_list *im;
struct ip_sf_list *psf;
@@ -2381,7 +2384,7 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -2391,7 +2394,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
@@ -2556,7 +2558,7 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -2566,7 +2568,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 9a01bb81f8bf..96bbe2a0aa1b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -13,7 +13,6 @@
* 2 of the License, or(at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/jhash.h>
@@ -40,7 +39,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 };
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb)
{
- const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
+ const __be32 sk_rcv_saddr = inet_rcv_saddr(sk);
struct sock *sk2;
struct hlist_node *node;
int reuse = sk->sk_reuse;
@@ -53,7 +52,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) {
- const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+ const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
if (!sk2_rcv_saddr || !sk_rcv_saddr ||
sk2_rcv_saddr == sk_rcv_saddr)
break;
@@ -328,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
{ .sport = inet_sk(sk)->sport,
.dport = ireq->rmt_port } } };
+ security_req_classify_flow(req, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
@@ -342,10 +342,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
EXPORT_SYMBOL_GPL(inet_csk_route_req);
-static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
+static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
const u32 rnd, const u16 synq_hsize)
{
- return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1);
+ return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -356,8 +356,8 @@ static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
struct request_sock *inet_csk_search_req(const struct sock *sk,
struct request_sock ***prevp,
- const __u16 rport, const __u32 raddr,
- const __u32 laddr)
+ const __be16 rport, const __be32 raddr,
+ const __be32 laddr)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+
+ security_inet_csk_clone(newsk, req);
}
return newsk;
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 457db99c76df..77761ac4f7bb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -11,7 +11,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fcntl.h>
@@ -37,8 +36,8 @@
static const struct inet_diag_handler **inet_diag_table;
struct inet_diag_entry {
- u32 *saddr;
- u32 *daddr;
+ __be32 *saddr;
+ __be32 *daddr;
u16 sport;
u16 dport;
u16 family;
@@ -295,7 +294,7 @@ out:
return err;
}
-static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
+static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
{
int words = bits >> 5;
@@ -306,8 +305,8 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
return 0;
}
if (bits) {
- __u32 w1, w2;
- __u32 mask;
+ __be32 w1, w2;
+ __be32 mask;
w1 = a1[words];
w2 = a2[words];
@@ -353,7 +352,7 @@ static int inet_diag_bc_run(const void *bc, int len,
case INET_DIAG_BC_S_COND:
case INET_DIAG_BC_D_COND: {
struct inet_diag_hostcond *cond;
- u32 *addr;
+ __be32 *addr;
cond = (struct inet_diag_hostcond *)(op + 1);
if (cond->port != -1 &&
@@ -910,11 +909,10 @@ static int __init inet_diag_init(void)
sizeof(struct inet_diag_handler *));
int err = -ENOMEM;
- inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL);
+ inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
if (!inet_diag_table)
goto out;
- memset(inet_diag_table, 0, inet_diag_table_size);
idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
THIS_MODULE);
if (idiagnl == NULL)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ee9b5515b9ae..244c4f445c7d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -13,7 +13,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/sched.h>
@@ -125,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
* remote address for the connection. So always assume those are both
* wildcarded during the search since they can never be otherwise.
*/
-struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
- const unsigned short hnum, const int dif)
+static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+ const __be32 daddr,
+ const unsigned short hnum,
+ const int dif)
{
struct sock *result = NULL, *sk;
const struct hlist_node *node;
@@ -136,7 +137,7 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
const struct inet_sock *inet = inet_sk(sk);
if (inet->num == hnum && !ipv6_only_sock(sk)) {
- const __u32 rcv_saddr = inet->rcv_saddr;
+ const __be32 rcv_saddr = inet->rcv_saddr;
int score = sk->sk_family == PF_INET ? 1 : 0;
if (rcv_saddr) {
@@ -160,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
return result;
}
+/* Optimize the common listener case. */
+struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ const __be32 daddr, const unsigned short hnum,
+ const int dif)
+{
+ struct sock *sk = NULL;
+ const struct hlist_head *head;
+
+ read_lock(&hashinfo->lhash_lock);
+ head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+ if (!hlist_empty(head)) {
+ const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+ if (inet->num == hnum && !sk->sk_node.next &&
+ (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ !sk->sk_bound_dev_if)
+ goto sherry_cache;
+ sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
+ }
+ if (sk) {
+sherry_cache:
+ sock_hold(sk);
+ }
+ read_unlock(&hashinfo->lhash_lock);
+ return sk;
+}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
/* called with local bh disabled */
@@ -169,11 +197,11 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
- u32 daddr = inet->rcv_saddr;
- u32 saddr = inet->daddr;
+ __be32 daddr = inet->rcv_saddr;
+ __be32 saddr = inet->daddr;
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
struct sock *sk2;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 417f126c749e..cdd805344c61 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -8,7 +8,6 @@
* From code orinally in TCP
*/
-#include <linux/config.h>
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2160874ce7aa..2b1a54b59c48 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty;
static DEFINE_RWLOCK(peer_pool_lock);
#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
-static volatile int peer_total;
+static int peer_total;
/* Exported for sysctl_net_ipv4. */
int inet_peer_threshold = 65536 + 128; /* start to throw entries more
* aggressively at this stage */
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
peer_cachep = kmem_cache_create("inet_peer_cache",
sizeof(struct inet_peer),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!peer_cachep)
- panic("cannot create inet_peer_cache");
-
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
@@ -166,7 +163,7 @@ static void unlink_from_unused(struct inet_peer *p)
for (u = peer_root; u != peer_avl_empty; ) { \
if (daddr == u->v4daddr) \
break; \
- if (daddr < u->v4daddr) \
+ if ((__force __u32)daddr < (__force __u32)u->v4daddr) \
v = &u->avl_left; \
else \
v = &u->avl_right; \
@@ -371,7 +368,7 @@ static int cleanup_once(unsigned long ttl)
}
/* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(__u32 daddr, int create)
+struct inet_peer *inet_getpeer(__be32 daddr, int create)
{
struct inet_peer *p, *n;
struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0bb529ab70..a22d11d2911c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -21,7 +21,6 @@
* Mike McLagan : Routing by source
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/sched.h>
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index da734c439179..74046efdf875 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -23,7 +23,6 @@
*/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -55,15 +54,15 @@
* even the most extreme cases without allowing an attacker to measurably
* harm machine performance.
*/
-int sysctl_ipfrag_high_thresh = 256*1024;
-int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-int sysctl_ipfrag_max_dist = 64;
+int sysctl_ipfrag_max_dist __read_mostly = 64;
/* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
*/
-int sysctl_ipfrag_time = IP_FRAG_TIME;
+int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
struct ipfrag_skb_cb
{
@@ -78,9 +77,9 @@ struct ipq {
struct hlist_node list;
struct list_head lru_list; /* lru list member */
u32 user;
- u32 saddr;
- u32 daddr;
- u16 id;
+ __be32 saddr;
+ __be32 daddr;
+ __be16 id;
u8 protocol;
u8 last_in;
#define COMPLETE 4
@@ -124,14 +123,15 @@ static __inline__ void ipq_unlink(struct ipq *ipq)
write_unlock(&ipfrag_lock);
}
-static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
+static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
- return jhash_3words((u32)id << 16 | prot, saddr, daddr,
+ return jhash_3words((__force u32)id << 16 | prot,
+ (__force u32)saddr, (__force u32)daddr,
ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
}
static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
static void ipfrag_secret_rebuild(unsigned long dummy)
{
@@ -388,8 +388,8 @@ out_nomem:
static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{
__be16 id = iph->id;
- __u32 saddr = iph->saddr;
- __u32 daddr = iph->daddr;
+ __be32 saddr = iph->saddr;
+ __be32 daddr = iph->daddr;
__u8 protocol = iph->protocol;
unsigned int hash;
struct ipq *qp;
@@ -666,7 +666,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_HW)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ab99bebdcdc8..f5fba051df3d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -11,7 +11,6 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -394,7 +393,8 @@ out:
int code = skb->h.icmph->code;
int rel_type = 0;
int rel_code = 0;
- int rel_info = 0;
+ __be32 rel_info = 0;
+ __u32 n = 0;
u16 flags;
int grehlen = (iph->ihl<<2) + 4;
struct sk_buff *skb2;
@@ -423,14 +423,16 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+ n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ if (n < (iph->ihl<<2))
return;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMP_PARAMETERPROB;
- rel_info = skb->h.icmph->un.gateway - grehlen;
+ n -= grehlen;
+ rel_info = htonl(n << 24);
break;
case ICMP_DEST_UNREACH:
@@ -441,13 +443,14 @@ out:
return;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- rel_info = ntohs(skb->h.icmph->un.frag.mtu);
- if (rel_info < grehlen+68)
+ n = ntohs(skb->h.icmph->un.frag.mtu);
+ if (n < grehlen+68)
return;
- rel_info -= grehlen;
+ n -= grehlen;
/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (rel_info > ntohs(eiph->tot_len))
+ if (n > ntohs(eiph->tot_len))
return;
+ rel_info = htonl(n);
break;
default:
/* All others are translated to HOST_UNREACH.
@@ -509,12 +512,11 @@ out:
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (rel_info > dst_mtu(skb2->dst)) {
+ if (n > dst_mtu(skb2->dst)) {
kfree_skb(skb2);
return;
}
- skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
- rel_info = htonl(rel_info);
+ skb2->dst->ops->update_pmtu(skb2->dst, n);
} else if (type == ICMP_TIME_EXCEEDED) {
struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
@@ -577,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if (flags&GRE_CSUM) {
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
csum = (u16)csum_fold(skb->csum);
if (!csum)
break;
@@ -585,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb)
case CHECKSUM_NONE:
skb->csum = 0;
csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_COMPLETE;
}
offset += 4;
}
@@ -618,7 +620,6 @@ static int ipgre_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = __pskb_pull(skb, offset);
skb_postpull_rcsum(skb, skb->h.raw, offset);
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c9026dbf4c93..212734ca238f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -121,7 +121,6 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
-#include <linux/config.h>
#include <linux/net.h>
#include <linux/socket.h>
@@ -429,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
}
+ /* Remove any debris in the socket control block */
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+
return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index cbcae6544622..8dabbfc31267 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
+#include <net/cipso_ipv4.h>
/*
* Write options to IP header, record destination address to
@@ -37,7 +38,7 @@
*/
void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
- u32 daddr, struct rtable *rt, int is_frag)
+ __be32 daddr, struct rtable *rt, int is_frag)
{
unsigned char * iph = skb->nh.raw;
@@ -56,7 +57,7 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
if (opt->ts_needtime) {
struct timeval tv;
- __u32 midtime;
+ __be32 midtime;
do_gettimeofday(&tv);
midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
@@ -90,7 +91,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
unsigned char *sptr, *dptr;
int soffset, doffset;
int optlen;
- u32 daddr;
+ __be32 daddr;
memset(dopt, 0, sizeof(struct ip_options));
@@ -147,7 +148,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
dopt->ts_needtime = 0;
if (soffset + 8 <= optlen) {
- __u32 addr;
+ __be32 addr;
memcpy(&addr, sptr+soffset-1, 4);
if (inet_addr_type(addr) != RTN_LOCAL) {
@@ -164,7 +165,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
}
if (sopt->srr) {
unsigned char * start = sptr+sopt->srr;
- u32 faddr;
+ __be32 faddr;
optlen = start[1];
soffset = start[2];
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
dopt->is_strictroute = sopt->is_strictroute;
}
}
+ if (sopt->cipso) {
+ optlen = sptr[sopt->cipso+1];
+ dopt->cipso = dopt->optlen+sizeof(struct iphdr);
+ memcpy(dptr, sptr+sopt->cipso, optlen);
+ dptr += optlen;
+ dopt->optlen += optlen;
+ }
while (dopt->optlen & 3) {
*dptr++ = IPOPT_END;
dopt->optlen++;
@@ -256,7 +264,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (!opt) {
opt = &(IPCB(skb)->opt);
- memset(opt, 0, sizeof(struct ip_options));
iph = skb->nh.raw;
opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
optptr = iph + sizeof(struct iphdr);
@@ -355,7 +362,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
goto error;
}
if (optptr[2] <= optlen) {
- __u32 * timeptr = NULL;
+ __be32 *timeptr = NULL;
if (optptr[2]+3 > optptr[1]) {
pp_ptr = optptr + 2;
goto error;
@@ -364,7 +371,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
case IPOPT_TS_TSONLY:
opt->ts = optptr - iph;
if (skb)
- timeptr = (__u32*)&optptr[optptr[2]-1];
+ timeptr = (__be32*)&optptr[optptr[2]-1];
opt->ts_needtime = 1;
optptr[2] += 4;
break;
@@ -376,7 +383,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
opt->ts = optptr - iph;
if (skb) {
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
- timeptr = (__u32*)&optptr[optptr[2]+3];
+ timeptr = (__be32*)&optptr[optptr[2]+3];
}
opt->ts_needaddr = 1;
opt->ts_needtime = 1;
@@ -389,12 +396,12 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
}
opt->ts = optptr - iph;
{
- u32 addr;
+ __be32 addr;
memcpy(&addr, &optptr[optptr[2]-1], 4);
if (inet_addr_type(addr) == RTN_UNICAST)
break;
if (skb)
- timeptr = (__u32*)&optptr[optptr[2]+3];
+ timeptr = (__be32*)&optptr[optptr[2]+3];
}
opt->ts_needtime = 1;
optptr[2] += 8;
@@ -408,10 +415,10 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
}
if (timeptr) {
struct timeval tv;
- __u32 midtime;
+ __be32 midtime;
do_gettimeofday(&tv);
midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
- memcpy(timeptr, &midtime, sizeof(__u32));
+ memcpy(timeptr, &midtime, sizeof(__be32));
opt->is_changed = 1;
}
} else {
@@ -435,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (optptr[2] == 0 && optptr[3] == 0)
opt->router_alert = optptr - iph;
break;
+ case IPOPT_CIPSO:
+ if (opt->cipso) {
+ pp_ptr = optptr;
+ goto error;
+ }
+ opt->cipso = optptr - iph;
+ if (cipso_v4_validate(&optptr)) {
+ pp_ptr = optptr;
+ goto error;
+ }
+ break;
case IPOPT_SEC:
case IPOPT_SID:
default:
@@ -507,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp,
opt->__data[optlen++] = IPOPT_END;
opt->optlen = optlen;
opt->is_data = 1;
- opt->is_setbyuser = 1;
if (optlen && ip_options_compile(opt, NULL)) {
kfree(opt);
return -EINVAL;
@@ -590,7 +607,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
int srrspace, srrptr;
- u32 nexthop;
+ __be32 nexthop;
struct iphdr *iph = skb->nh.iph;
unsigned char * optptr = skb->nh.raw + opt->srr;
struct rtable *rt = (struct rtable*)skb->dst;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7624fd1d8f9f..fc195a44fc2e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -53,7 +53,6 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
-#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -84,7 +83,7 @@
#include <linux/netlink.h>
#include <linux/tcp.h>
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check(struct iphdr *iph)
@@ -119,7 +118,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
*
*/
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
- u32 saddr, u32 daddr, struct ip_options *opt)
+ __be32 saddr, __be32 daddr, struct ip_options *opt)
{
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)skb->dst;
@@ -210,7 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
+ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -307,7 +306,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
/* Make sure we can route this packet. */
rt = (struct rtable *)__sk_dst_check(sk, 0);
if (rt == NULL) {
- u32 daddr;
+ __be32 daddr;
/* Use correct destination address if we have options. */
daddr = inet->daddr;
@@ -329,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
* keep trying until route appears or the connection times
* itself out.
*/
+ security_sk_classify_flow(sk, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0))
goto no_route;
}
@@ -426,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
int ptr;
struct net_device *dev;
struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
+ unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
__be16 not_last_frag;
struct rtable *rt = (struct rtable*)skb->dst;
@@ -441,6 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = skb->nh.iph;
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->u.dst)));
kfree_skb(skb);
@@ -527,6 +528,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
err = output(skb);
+ if (!err)
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -553,14 +556,13 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = raw + hlen; /* Where to start from */
-#ifdef CONFIG_BRIDGE_NETFILTER
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header */
- ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
- mtu -= nf_bridge_pad(skb);
-#else
- ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+ * we need to make room for the encapsulating header
+ */
+ pad = nf_bridge_pad(skb);
+ ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+ mtu -= pad;
+
/*
* Fragment the datagram.
*/
@@ -650,9 +652,6 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
-
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
iph->tot_len = htons(len + hlen);
ip_send_check(iph);
@@ -660,6 +659,8 @@ slow_path:
err = output(skb2);
if (err)
goto fail;
+
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
@@ -678,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
{
struct iovec *iov = from;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (memcpy_fromiovecend(to, iov, offset, len) < 0)
return -EFAULT;
} else {
@@ -734,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->h.raw = skb->data + fragheaderlen;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
sk->sk_sndmsg_off = 0;
}
@@ -744,7 +745,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
if (!err) {
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
__skb_queue_tail(&sk->sk_write_queue, skb);
return 0;
@@ -842,7 +843,7 @@ int ip_append_data(struct sock *sk,
length + fragheaderlen <= mtu &&
rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
!exthdrlen)
- csummode = CHECKSUM_HW;
+ csummode = CHECKSUM_PARTIAL;
inet->cork.length += length;
if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -947,7 +948,7 @@ alloc_new_skb:
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
- skb_trim(skb_prev, maxfraglen);
+ pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
@@ -1089,14 +1090,14 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if ((sk->sk_protocol == IPPROTO_UDP) &&
(rt->u.dst.dev->features & NETIF_F_UFO)) {
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
while (size > 0) {
int i;
- if (skb_shinfo(skb)->gso_size)
+ if (skb_is_gso(skb))
len = size;
else {
@@ -1142,7 +1143,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
data, fraggap, 0);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
- skb_trim(skb_prev, maxfraglen);
+ pskb_trim_unique(skb_prev, maxfraglen);
}
/*
@@ -1339,7 +1340,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
char data[40];
} replyopts;
struct ipcm_cookie ipc;
- u32 daddr;
+ __be32 daddr;
struct rtable *rt = (struct rtable*)skb->dst;
if (ip_options_echo(&replyopts.opt, skb))
@@ -1365,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
{ .sport = skb->h.th->dest,
.dport = skb->h.th->source } },
.proto = sk->sk_protocol };
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
return;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 12e0bf19f24a..4b132953bcc2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -17,7 +17,6 @@
* Mike McLagan : Routing by source
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -113,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
{
char *secdata;
- u32 seclen;
+ u32 seclen, secid;
int err;
- err = security_socket_getpeersec_dgram(skb, &secdata, &seclen);
+ err = security_socket_getpeersec_dgram(NULL, skb, &secid);
+ if (err)
+ return;
+
+ err = security_secid_to_secctx(secid, &secdata, &seclen);
if (err)
return;
put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
+ security_release_secctx(secdata, seclen);
}
@@ -250,7 +254,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s
}
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
- u16 port, u32 info, u8 *payload)
+ __be16 port, u32 info, u8 *payload)
{
struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
@@ -279,7 +283,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
kfree_skb(skb);
}
-void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info)
+void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
{
struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 3ed8b57a1002..2017d36024d4 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -13,7 +13,6 @@
* - Compression stats.
* - Adaptive compression.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/scatterlist.h>
#include <asm/semaphore.h>
@@ -33,7 +32,7 @@
struct ipcomp_tfms {
struct list_head list;
- struct crypto_tfm **tfms;
+ struct crypto_comp **tfms;
int users;
};
@@ -47,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
int err, plen, dlen;
struct ipcomp_data *ipcd = x->data;
u8 *start, *scratch;
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
int cpu;
plen = skb->len;
@@ -71,7 +70,8 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
if (err)
goto out;
- skb_put(skb, dlen - plen);
+ skb->truesize += dlen - plen;
+ __skb_put(skb, dlen - plen);
memcpy(skb->data, scratch, dlen);
out:
put_cpu();
@@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = skb->nh.iph;
struct ipcomp_data *ipcd = x->data;
u8 *start, *scratch;
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
int cpu;
ihlen = iph->ihl * 4;
@@ -176,14 +176,14 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
out_ok:
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
ip_send_check(iph);
return 0;
}
static void ipcomp4_err(struct sk_buff *skb, u32 info)
{
- u32 spi;
+ __be32 spi;
struct iphdr *iph = (struct iphdr *)skb->data;
struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->id.daddr.a4 = x->id.daddr.a4;
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET;
- t->props.mode = 1;
+ t->props.mode = XFRM_MODE_TUNNEL;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
@@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void)
return scratches;
}
-static void ipcomp_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
{
struct ipcomp_tfms *pos;
int cpu;
@@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms)
return;
for_each_possible_cpu(cpu) {
- struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
- crypto_free_tfm(tfm);
+ struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+ crypto_free_comp(tfm);
}
free_percpu(tfms);
}
-static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
{
struct ipcomp_tfms *pos;
- struct crypto_tfm **tfms;
+ struct crypto_comp **tfms;
int cpu;
/* This can be any valid CPU ID so we don't need locking. */
cpu = raw_smp_processor_id();
list_for_each_entry(pos, &ipcomp_tfms_list, list) {
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
tfms = pos->tfms;
tfm = *per_cpu_ptr(tfms, cpu);
- if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+ if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
return tfms;
}
@@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
INIT_LIST_HEAD(&pos->list);
list_add(&pos->list, &ipcomp_tfms_list);
- pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+ pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
if (!tfms)
goto error;
for_each_possible_cpu(cpu) {
- struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+ struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+ CRYPTO_ALG_ASYNC);
if (!tfm)
goto error;
*per_cpu_ptr(tfms, cpu) = tfm;
@@ -410,13 +411,12 @@ static int ipcomp_init_state(struct xfrm_state *x)
goto out;
err = -ENOMEM;
- ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+ ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
if (!ipcd)
goto out;
- memset(ipcd, 0, sizeof(*ipcd));
x->props.header_len = 0;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
goto error;
mutex_unlock(&ipcomp_resource_mutex);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = ipcomp_tunnel_attach(x);
if (err)
goto error_tunnel;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f18ef6..1fbb38415b19 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
* -- Josef Siemes <jsiemes@web.de>, Aug 2002
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ea398ee43f28..0c4556529228 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -94,7 +94,6 @@
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -342,7 +341,8 @@ out:
int code = skb->h.icmph->code;
int rel_type = 0;
int rel_code = 0;
- int rel_info = 0;
+ __be32 rel_info = 0;
+ __u32 n = 0;
struct sk_buff *skb2;
struct flowi fl;
struct rtable *rt;
@@ -355,14 +355,15 @@ out:
default:
return 0;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < hlen)
+ n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ if (n < hlen)
return 0;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMP_PARAMETERPROB;
- rel_info = skb->h.icmph->un.gateway - hlen;
+ rel_info = htonl((n - hlen) << 24);
break;
case ICMP_DEST_UNREACH:
@@ -373,13 +374,14 @@ out:
return 0;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- rel_info = ntohs(skb->h.icmph->un.frag.mtu);
- if (rel_info < hlen+68)
+ n = ntohs(skb->h.icmph->un.frag.mtu);
+ if (n < hlen+68)
return 0;
- rel_info -= hlen;
+ n -= hlen;
/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (rel_info > ntohs(eiph->tot_len))
+ if (n > ntohs(eiph->tot_len))
return 0;
+ rel_info = htonl(n);
break;
default:
/* All others are translated to HOST_UNREACH.
@@ -441,12 +443,11 @@ out:
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (rel_info > dst_mtu(skb2->dst)) {
+ if (n > dst_mtu(skb2->dst)) {
kfree_skb(skb2);
return 0;
}
- skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
- rel_info = htonl(rel_info);
+ skb2->dst->ops->update_pmtu(skb2->dst, n);
} else if (type == ICMP_TIME_EXCEEDED) {
struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
@@ -488,7 +489,6 @@ static int ipip_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = skb->data;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 717ab7d6d7b6..97cfa97c8abb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,7 +28,6 @@
*
*/
-#include <linux/config.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
@@ -313,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
e = NLMSG_DATA(nlh);
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
- netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+ rtnl_unicast(skb, NETLINK_CB(skb).pid);
} else
kfree_skb(skb);
}
@@ -462,7 +462,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
return 0;
}
-static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
+static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
{
int line=MFC_HASH(mcastgrp,origin);
struct mfc_cache *c;
@@ -513,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (skb->nh.iph->version == 0) {
- int err;
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -526,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
e->error = -EMSGSIZE;
memset(&e->msg, 0, sizeof(e->msg));
}
- err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+ rtnl_unicast(skb, NETLINK_CB(skb).pid);
} else
ip_mr_forward(skb, c, 0);
}
@@ -1097,7 +1097,7 @@ static struct notifier_block ip_mr_notifier={
* important for multicast video.
*/
-static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
+static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
@@ -1462,7 +1462,6 @@ int pim_rcv_v1(struct sk_buff * skb)
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
@@ -1518,7 +1517,6 @@ static int pim_rcv(struct sk_buff * skb)
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
@@ -1581,6 +1579,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
if (cache==NULL) {
+ struct sk_buff *skb2;
struct net_device *dev;
int vif;
@@ -1594,12 +1593,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
read_unlock(&mrt_lock);
return -ENODEV;
}
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
- skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
- skb->nh.iph->saddr = rt->rt_src;
- skb->nh.iph->daddr = rt->rt_dst;
- skb->nh.iph->version = 0;
- err = ipmr_cache_unresolved(vif, skb);
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2) {
+ read_unlock(&mrt_lock);
+ return -ENOMEM;
+ }
+
+ skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
+ skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
+ skb2->nh.iph->saddr = rt->rt_src;
+ skb2->nh.iph->daddr = rt->rt_dst;
+ skb2->nh.iph->version = 0;
+ err = ipmr_cache_unresolved(vif, skb2);
read_unlock(&mrt_lock);
return err;
}
@@ -1895,11 +1900,8 @@ void __init ip_mr_init(void)
{
mrt_cachep = kmem_cache_create("ip_mrt_cache",
sizeof(struct mfc_cache),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!mrt_cachep)
- panic("cannot allocate ip_mrt_cache");
-
init_timer(&ipmr_expire_timer);
ipmr_expire_timer.function=ipmr_expire_process;
register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 87b83813cf2c..8832eb517d52 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -115,9 +115,9 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port)
+static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
{
- return jhash_3words(addr, port, proto, ip_vs_conn_rnd)
+ return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
& IP_VS_CONN_TAB_MASK;
}
@@ -188,7 +188,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
* d_addr, d_port: pkt dest address (load balancer)
*/
static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
@@ -215,7 +215,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
}
struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
{
struct ip_vs_conn *cp;
@@ -234,7 +234,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
@@ -274,7 +274,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
* d_addr, d_port: pkt dest address (foreign host)
*/
struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp, *ret=NULL;
@@ -324,7 +324,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
/*
* Fill a no_client_port connection with a client port number
*/
-void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport)
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
spin_lock(&cp->lock);
@@ -508,10 +508,10 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
/*
* Invalidate the connection template
*/
- if (ct->vport != 65535) {
+ if (ct->vport != htons(0xffff)) {
if (ip_vs_conn_unhash(ct)) {
- ct->dport = 65535;
- ct->vport = 65535;
+ ct->dport = htons(0xffff);
+ ct->vport = htons(0xffff);
ct->cport = 0;
ip_vs_conn_hash(ct);
}
@@ -596,8 +596,8 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
- __u32 daddr, __u16 dport, unsigned flags,
+ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
+ __be32 daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest)
{
struct ip_vs_conn *cp;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3f47ad8e1cad..6dee03935f78 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
const struct sk_buff *skb,
- __u16 ports[2])
+ __be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
- __u16 dport; /* destination port to forward */
- __u32 snet; /* source network of the client, after masking */
+ __be16 dport; /* destination port to forward */
+ __be32 snet; /* source network of the client, after masking */
/* Mask saddr with the netmask to adjust template granularity */
snet = iph->saddr & svc->netmask;
@@ -383,7 +383,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
@@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
struct iphdr *iph = skb->nh.iph;
pptr = skb_header_pointer(skb, iph->ihl*4,
@@ -576,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
- __u16 *ports = (void *)ciph + ciph->ihl*4;
+ __be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout)
ports[1] = cp->vport;
@@ -775,7 +775,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) {
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, ihl,
sizeof(_ports), _ports);
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f28ec6882162..f261616e4602 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -283,7 +283,7 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
* Returns hash value for virtual service
*/
static __inline__ unsigned
-ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
+ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
{
register unsigned porth = ntohs(port);
@@ -365,7 +365,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
* Get service by {proto,addr,port} in the service table.
*/
static __inline__ struct ip_vs_service *
-__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
+__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
@@ -410,7 +410,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
}
struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
+ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
{
struct ip_vs_service *svc;
@@ -480,7 +480,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
/*
* Returns hash value for real service
*/
-static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
+static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
{
register unsigned porth = ntohs(port);
@@ -531,7 +531,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
+ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
{
unsigned hash;
struct ip_vs_dest *dest;
@@ -562,7 +562,7 @@ ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
* Lookup destination by {addr,port} in the given service
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
{
struct ip_vs_dest *dest;
@@ -591,7 +591,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
{
struct ip_vs_dest *dest, *nxt;
@@ -735,12 +735,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
- dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+ dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
if (dest == NULL) {
IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
return -ENOMEM;
}
- memset(dest, 0, sizeof(struct ip_vs_dest));
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
@@ -774,8 +773,8 @@ static int
ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
- __u32 daddr = udest->addr;
- __u16 dport = udest->port;
+ __be32 daddr = udest->addr;
+ __be16 dport = udest->port;
int ret;
EnterFunction(2);
@@ -880,8 +879,8 @@ static int
ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
- __u32 daddr = udest->addr;
- __u16 dport = udest->port;
+ __be32 daddr = udest->addr;
+ __be16 dport = udest->port;
EnterFunction(2);
@@ -992,8 +991,8 @@ static int
ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
- __u32 daddr = udest->addr;
- __u16 dport = udest->port;
+ __be32 daddr = udest->addr;
+ __be16 dport = udest->port;
EnterFunction(2);
@@ -1050,14 +1049,12 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
goto out_mod_dec;
}
- svc = (struct ip_vs_service *)
- kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+ svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
if (svc == NULL) {
IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
ret = -ENOMEM;
goto out_err;
}
- memset(svc, 0, sizeof(struct ip_vs_service));
/* I'm the first user of the service */
atomic_set(&svc->usecnt, 1);
@@ -1797,7 +1794,7 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1808,7 +1805,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 9fee19c4c617..502111fba872 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -66,7 +66,7 @@ struct ip_vs_dh_bucket {
/*
* Returns hash value for IPVS DH entry
*/
-static inline unsigned ip_vs_dh_hashkey(__u32 addr)
+static inline unsigned ip_vs_dh_hashkey(__be32 addr)
{
return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
}
@@ -76,7 +76,7 @@ static inline unsigned ip_vs_dh_hashkey(__u32 addr)
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr)
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
{
return (tbl[ip_vs_dh_hashkey(addr)]).dest;
}
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index c453e1e57f4b..7d68b80c4c19 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,7 +13,6 @@
* Changes:
*
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
@@ -124,11 +123,10 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est;
- est = kmalloc(sizeof(*est), GFP_KERNEL);
+ est = kzalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOMEM;
- memset(est, 0, sizeof(*est));
est->stats = stats;
est->last_conns = stats->conns;
est->cps = stats->cps<<10;
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index a19a33ceb811..e433cb0ff894 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -32,6 +32,7 @@
#include <linux/ip.h>
#include <net/protocol.h>
#include <net/tcp.h>
+#include <asm/unaligned.h>
#include <net/ip_vs.h>
@@ -44,16 +45,9 @@
* List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
* First port is set to the default port.
*/
-static int ports[IP_VS_APP_MAX_PORTS] = {21, 0};
-module_param_array(ports, int, NULL, 0);
-
-/*
- * Debug level
- */
-#ifdef CONFIG_IP_VS_DEBUG
-static int debug=0;
-module_param(debug, int, 0);
-#endif
+static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, ushort, NULL, 0);
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
/* Dummy variable */
@@ -81,7 +75,7 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
*/
static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
const char *pattern, size_t plen, char term,
- __u32 *addr, __u16 *port,
+ __be32 *addr, __be16 *port,
char **start, char **end)
{
unsigned char p[6];
@@ -121,8 +115,8 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
if (i != 5)
return -1;
- *addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
- *port = (p[5]<<8) | p[4];
+ *addr = get_unaligned((__be32 *)p);
+ *port = get_unaligned((__be16 *)(p + 4));
return 1;
}
@@ -147,8 +141,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
- __u32 from;
- __u16 port;
+ __be32 from;
+ __be16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
@@ -177,7 +171,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
&start, &end) != 1)
return 1;
- IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
+ IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n",
NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
@@ -206,7 +200,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
from = n_cp->vaddr;
port = n_cp->vport;
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
- port&255, (port>>8)&255);
+ ntohs(port)&255, (ntohs(port)>>8)&255);
buf_len = strlen(buf);
/*
@@ -250,8 +244,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_start, *data_limit;
char *start, *end;
- __u32 to;
- __u16 port;
+ __be32 to;
+ __be16 port;
struct ip_vs_conn *n_cp;
/* no diff required for incoming packets */
@@ -280,7 +274,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
/* Passive mode on */
- IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n",
+ IP_VS_DBG(7, "got PASV at %zd of %zd\n",
data - data_start,
data_limit - data_start);
cp->app_data = &ip_vs_ftp_pasv;
@@ -302,7 +296,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
&start, &end) != 1)
return 1;
- IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
+ IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
NIPQUAD(to), ntohs(port));
/* Passive mode off */
@@ -311,7 +305,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Now update or create a connection entry for it
*/
- IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
@@ -375,8 +369,8 @@ static int __init ip_vs_ftp_init(void)
ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
if (ret)
break;
- IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n",
- app->name, i, ports[i]);
+ IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
+ app->name, i, ports[i]);
}
if (ret)
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 6e5cb92a5c83..524751e031de 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -87,7 +87,7 @@ static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
*/
struct ip_vs_lblc_entry {
struct list_head list;
- __u32 addr; /* destination IP address */
+ __be32 addr; /* destination IP address */
struct ip_vs_dest *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
};
@@ -160,7 +160,7 @@ static struct ctl_table_header * sysctl_header;
* IP address to a server.
*/
static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
+ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
@@ -195,7 +195,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
/*
* Returns hash value for IPVS LBLC entry
*/
-static inline unsigned ip_vs_lblc_hashkey(__u32 addr)
+static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
{
return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
}
@@ -234,7 +234,7 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
* Get ip_vs_lblc_entry associated with supplied parameters.
*/
static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr)
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
{
unsigned hash;
struct ip_vs_lblc_entry *en;
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 32ba37ba72d8..08990192b6ec 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -276,7 +276,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
*/
struct ip_vs_lblcr_entry {
struct list_head list;
- __u32 addr; /* destination IP address */
+ __be32 addr; /* destination IP address */
struct ip_vs_dest_set set; /* destination server set */
unsigned long lastuse; /* last used time */
};
@@ -348,7 +348,7 @@ static struct ctl_table_header * sysctl_header;
* new/free a ip_vs_lblcr_entry, which is a mapping of a destination
* IP address to a server.
*/
-static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr)
+static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
{
struct ip_vs_lblcr_entry *en;
@@ -381,7 +381,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
/*
* Returns hash value for IPVS LBLCR entry
*/
-static inline unsigned ip_vs_lblcr_hashkey(__u32 addr)
+static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
{
return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
}
@@ -420,7 +420,7 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
* Get ip_vs_lblcr_entry associated with supplied parameters.
*/
static inline struct ip_vs_lblcr_entry *
-ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr)
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
{
unsigned hash;
struct ip_vs_lblcr_entry *en;
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 867d4e9c6594..c4528b5c800d 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -176,7 +176,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
pp->name, NIPQUAD(ih->saddr),
NIPQUAD(ih->daddr));
else {
- __u16 _ports[2], *pptr
+ __be16 _ports[2], *pptr
;
pptr = skb_header_pointer(skb, offset + ih->ihl*4,
sizeof(_ports), _ports);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b1160a3a..bfe779e74590 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -29,7 +29,7 @@ static struct ip_vs_conn *
tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
@@ -50,7 +50,7 @@ static struct ip_vs_conn *
tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
@@ -112,12 +112,12 @@ tcp_conn_schedule(struct sk_buff *skb,
static inline void
-tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
- u16 oldport, u16 newport)
+tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
+ __be16 oldport, __be16 newport)
{
tcph->check =
ip_vs_check_diff(~oldip, newip,
- ip_vs_check_diff(oldport ^ 0xFFFF,
+ ip_vs_check_diff(oldport ^ htonl(0xFFFF),
newport, tcph->check));
}
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
skb->len - tcphoff,
skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
}
break;
default:
- /* CHECKSUM_UNNECESSARY */
+ /* No need to checksum. */
break;
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175d8f28..54aa7603591f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -29,7 +29,7 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
@@ -54,7 +54,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
- __u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
sizeof(_ports), _ports);
@@ -117,15 +117,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static inline void
-udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
- u16 oldport, u16 newport)
+udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
+ __be16 oldport, __be16 newport)
{
uhdr->check =
ip_vs_check_diff(~oldip, newip,
- ip_vs_check_diff(oldport ^ 0xFFFF,
+ ip_vs_check_diff(oldport ^ htonl(0xFFFF),
newport, uhdr->check));
if (!uhdr->check)
- uhdr->check = 0xFFFF;
+ uhdr->check = htonl(0xFFFF);
}
static int
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb,
cp->protocol,
(*pskb)->csum);
if (udph->check == 0)
- udph->check = 0xFFFF;
+ udph->check = htonl(0xFFFF);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, udph->check,
(char*)&(udph->check) - (char*)udph);
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(skb->nh.iph->saddr,
skb->nh.iph->daddr,
skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
}
break;
default:
- /* CHECKSUM_UNNECESSARY */
+ /* No need to checksum. */
break;
}
}
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 7775e6cc68be..338668f88fe2 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -63,7 +63,7 @@ struct ip_vs_sh_bucket {
/*
* Returns hash value for IPVS SH entry
*/
-static inline unsigned ip_vs_sh_hashkey(__u32 addr)
+static inline unsigned ip_vs_sh_hashkey(__be32 addr)
{
return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
}
@@ -73,7 +73,7 @@ static inline unsigned ip_vs_sh_hashkey(__u32 addr)
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr)
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
{
return (tbl[ip_vs_sh_hashkey(addr)]).dest;
}
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 1bca714bda3d..6ab57d72b615 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -48,16 +48,16 @@ struct ip_vs_sync_conn {
/* Protocol, addresses and port numbers */
__u8 protocol; /* Which protocol (TCP/UDP) */
- __u16 cport;
- __u16 vport;
- __u16 dport;
- __u32 caddr; /* client address */
- __u32 vaddr; /* virtual address */
- __u32 daddr; /* destination address */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
/* Flags and state transition */
- __u16 flags; /* status flags */
- __u16 state; /* state info */
+ __be16 flags; /* status flags */
+ __be16 state; /* state info */
/* The sequence options start here */
};
@@ -464,7 +464,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
struct net_device *dev;
- u32 addr;
+ __be32 addr;
struct sockaddr_in sin;
if ((dev = __dev_get_by_name(ifname)) == NULL)
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 52c12e9edbbc..e1f77bd7c9a5 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -232,7 +232,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
- __u16 _pt, *p;
+ __be16 _pt, *p;
p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index db67373f9b34..252e837b17a5 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -12,7 +12,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
index 5249dbe7c559..b8c289f247cb 100644
--- a/net/ipv4/multipath_random.c
+++ b/net/ipv4/multipath_random.c
@@ -12,7 +12,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index b6cd2870478f..bba5abe5542d 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -12,7 +12,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index 342d0b9098f5..92b04823e034 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -12,7 +12,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
@@ -61,8 +60,8 @@ struct multipath_dest {
struct list_head list;
const struct fib_nh *nh_info;
- __u32 netmask;
- __u32 network;
+ __be32 netmask;
+ __be32 network;
unsigned char prefixlen;
struct rcu_head rcu;
@@ -77,7 +76,7 @@ struct multipath_route {
struct list_head list;
int oif;
- __u32 gw;
+ __be32 gw;
struct list_head dests;
struct rcu_head rcu;
@@ -129,8 +128,8 @@ static unsigned char __multipath_lookup_weight(const struct flowi *fl,
/* find state entry for destination */
list_for_each_entry_rcu(d, &target_route->dests, list) {
- __u32 targetnetwork = fl->fl4_dst &
- (0xFFFFFFFF >> (32 - d->prefixlen));
+ __be32 targetnetwork = fl->fl4_dst &
+ inet_make_mask(d->prefixlen);
if ((targetnetwork & d->netmask) == d->network) {
weight = d->nh_info->nh_weight;
@@ -218,8 +217,8 @@ static void wrandom_select_route(const struct flowi *flp,
*rp = decision;
}
-static void wrandom_set_nhinfo(__u32 network,
- __u32 netmask,
+static void wrandom_set_nhinfo(__be32 network,
+ __be32 netmask,
unsigned char prefixlen,
const struct fib_nh *nh)
{
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b794bc..5ac15379a0cf 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -128,8 +128,8 @@ EXPORT_SYMBOL(ip_nat_decode_session);
*/
struct ip_rt_info {
- u_int32_t daddr;
- u_int32_t saddr;
+ __be32 daddr;
+ __be32 saddr;
u_int8_t tos;
};
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int csum = 0;
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
break;
if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e1d7f5fbc526..a55b8ff70ded 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_DSCP
- tristate "DSCP match support"
- depends on IP_NF_IPTABLES
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_AH
tristate "AH match support"
depends on IP_NF_IPTABLES
@@ -332,7 +321,7 @@ config IP_NF_MATCH_HASHLIMIT
help
This option adds a new iptables `hashlimit' match.
- As opposed to `limit', this match dynamically crates a hash table
+ As opposed to `limit', this match dynamically creates a hash table
of limit buckets, based on your selection of source/destination
ip addresses and/or ports.
@@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_DSCP
- tristate "DSCP target support"
- depends on IP_NF_MANGLE
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_TTL
tristate 'TTL target support'
depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3af59c..09aaed1a8063 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
@@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d0d19192026d..17e1a687ab45 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -9,7 +9,6 @@
*
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -57,8 +56,6 @@ do { \
#define ARP_NF_ASSERT(x)
#endif
-#include <linux/netfilter_ipv4/listhelp.h>
-
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
char *hdr_addr, int len)
{
@@ -83,7 +80,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
{
char *arpptr = (char *)(arphdr + 1);
char *src_devaddr, *tgt_devaddr;
- u32 src_ipaddr, tgt_ipaddr;
+ __be32 src_ipaddr, tgt_ipaddr;
int i, ret;
#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
@@ -209,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
if (net_ratelimit())
printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -227,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct arpt_table *table,
- void *userdata)
+ struct arpt_table *table)
{
static const char nulldevname[IFNAMSIZ];
unsigned int verdict = NF_DROP;
@@ -237,7 +232,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
- struct xt_table_info *private = table->private;
+ struct xt_table_info *private;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -249,6 +244,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
outdev = out ? out->name : nulldevname;
read_lock_bh(&table->lock);
+ private = table->private;
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
in, out,
hook,
t->u.kernel.target,
- t->data,
- userdata);
+ t->data);
/* Target might have changed stuff. */
arp = (*pskb)->nh.arph;
@@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
if (t->u.kernel.target == &arpt_standard_target) {
if (!standard_check(t, size)) {
ret = -EINVAL;
- goto out;
+ goto err;
}
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
- t->u.target_size
- - sizeof(*t),
e->comefrom)) {
duprintf("arp_tables: check failed for `%s'.\n",
t->u.kernel.target->name);
@@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
t = arpt_get_target(e);
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
return 0;
}
@@ -1120,7 +1112,8 @@ int arpt_register_table(struct arpt_table *table,
return ret;
}
- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+ ret = xt_register_table(table, &bootstrap, newinfo);
+ if (ret != 0) {
xt_free_table_info(newinfo);
return ret;
}
@@ -1170,21 +1163,34 @@ static int __init arp_tables_init(void)
{
int ret;
- xt_proto_init(NF_ARP);
+ ret = xt_proto_init(NF_ARP);
+ if (ret < 0)
+ goto err1;
/* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(&arpt_standard_target);
- xt_register_target(&arpt_error_target);
+ ret = xt_register_target(&arpt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&arpt_error_target);
+ if (ret < 0)
+ goto err3;
/* Register setsockopt */
ret = nf_register_sockopt(&arpt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err4;
printk("arp_tables: (C) 2002 David S. Miller\n");
return 0;
+
+err4:
+ xt_unregister_target(&arpt_error_target);
+err3:
+ xt_unregister_target(&arpt_standard_target);
+err2:
+ xt_proto_fini(NF_ARP);
+err1:
+ return ret;
}
static void __exit arp_tables_fini(void)
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c1ceb9..d12b1df252a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@ static unsigned int
target(struct sk_buff **pskb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
const struct arpt_mangle *mangle = targinfo;
struct arphdr *arp;
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename, const void *e, const struct xt_target *target,
- void *targinfo, unsigned int targinfosize, unsigned int hook_mask)
+ void *targinfo, unsigned int hook_mask)
{
const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472faa53b..7edea2a1696c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return arpt_do_table(pskb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 0a7bd7f04061..6c7383a8e42b 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -155,11 +155,11 @@ static int help(struct sk_buff **pskb,
exp->tuple.dst.protonum = IPPROTO_TCP;
exp->tuple.dst.u.tcp.port = htons(port);
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
exp->mask.dst.protonum = 0xFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
if (ip_nat_amanda_hook)
ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7e4cf9a4d15f..143c4668538b 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -17,7 +17,6 @@
* - export ip_conntrack[_expect]_{find_get,put} functions
* */
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/icmp.h>
#include <linux/ip.h>
@@ -48,7 +47,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#define IP_CONNTRACK_VERSION "2.4"
@@ -65,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size = 0;
-int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
+unsigned int ip_conntrack_htable_size __read_mostly = 0;
+int ip_conntrack_max __read_mostly;
+struct list_head *ip_conntrack_hash __read_mostly;
static kmem_cache_t *ip_conntrack_cachep __read_mostly;
static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid;
+unsigned int ip_ct_log_invalid __read_mostly;
static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc;
+static int ip_conntrack_vmalloc __read_mostly;
static unsigned int ip_conntrack_next_id;
static unsigned int ip_conntrack_expect_next_id;
@@ -151,8 +149,8 @@ static unsigned int ip_conntrack_hash_rnd;
static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
unsigned int size, unsigned int rnd)
{
- return (jhash_3words(tuple->src.ip,
- (tuple->dst.ip ^ tuple->dst.protonum),
+ return (jhash_3words((__force u32)tuple->src.ip,
+ ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
(tuple->src.u.all | (tuple->dst.u.all << 16)),
rnd) % size);
}
@@ -295,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
static void
clean_from_lists(struct ip_conntrack *ct)
{
- unsigned int ho, hr;
-
DEBUGP("clean_from_lists(%p)\n", ct);
ASSERT_WRITE_LOCK(&ip_conntrack_lock);
-
- ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
- LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
/* Destroy all pending expectations */
ip_ct_remove_expectations(ct);
@@ -314,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
+ struct ip_conntrack_helper *helper;
DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -322,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
ip_conntrack_event(IPCT_DESTROY, ct);
set_bit(IPS_DYING_BIT, &ct->status);
+ helper = ct->helper;
+ if (helper && helper->destroy)
+ helper->destroy(ct);
+
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
@@ -368,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
ip_conntrack_put(ct);
}
-static inline int
-conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- ASSERT_READ_LOCK(&ip_conntrack_lock);
- return tuplehash_to_ctrack(i) != ignored_conntrack
- && ip_ct_tuple_equal(tuple, &i->tuple);
-}
-
struct ip_conntrack_tuple_hash *
__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack)
@@ -387,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
ASSERT_READ_LOCK(&ip_conntrack_lock);
list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
- if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ if (tuplehash_to_ctrack(h) != ignored_conntrack &&
+ ip_ct_tuple_equal(tuple, &h->tuple)) {
CONNTRACK_STAT_INC(found);
return h;
}
@@ -418,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
unsigned int repl_hash)
{
ct->id = ++ip_conntrack_next_id;
- list_prepend(&ip_conntrack_hash[hash],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_prepend(&ip_conntrack_hash[repl_hash],
- &ct->tuplehash[IP_CT_DIR_REPLY].list);
+ list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+ &ip_conntrack_hash[hash]);
+ list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+ &ip_conntrack_hash[repl_hash]);
}
void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -441,6 +430,7 @@ int
__ip_conntrack_confirm(struct sk_buff **pskb)
{
unsigned int hash, repl_hash;
+ struct ip_conntrack_tuple_hash *h;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -471,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- if (!LIST_FIND(&ip_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_for_each_entry(h, &ip_conntrack_hash[hash], list)
+ if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &h->tuple))
+ goto out;
+ list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
+ if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &h->tuple))
+ goto out;
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- CONNTRACK_STAT_INC(insert);
- write_unlock_bh(&ip_conntrack_lock);
- if (ct->helper)
- ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+ /* Remove from unconfirmed list */
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+ __ip_conntrack_hash_insert(ct, hash, repl_hash);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ weird delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ CONNTRACK_STAT_INC(insert);
+ write_unlock_bh(&ip_conntrack_lock);
+ if (ct->helper)
+ ip_conntrack_event_cache(IPCT_HELPER, *pskb);
#ifdef CONFIG_IP_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
#endif
- ip_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
+ ip_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, *pskb);
- return NF_ACCEPT;
- }
+ return NF_ACCEPT;
+out:
CONNTRACK_STAT_INC(insert_failed);
write_unlock_bh(&ip_conntrack_lock);
-
return NF_DROP;
}
@@ -528,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
-{
- return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
-}
-
static int early_drop(struct list_head *chain)
{
/* Traverse backwards: gives us oldest, which is roughly LRU */
struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct = NULL;
+ struct ip_conntrack *ct = NULL, *tmp;
int dropped = 0;
read_lock_bh(&ip_conntrack_lock);
- h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
- if (h) {
- ct = tuplehash_to_ctrack(h);
- atomic_inc(&ct->ct_general.use);
+ list_for_each_entry_reverse(h, chain, list) {
+ tmp = tuplehash_to_ctrack(h);
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+ ct = tmp;
+ atomic_inc(&ct->ct_general.use);
+ break;
+ }
}
read_unlock_bh(&ip_conntrack_lock);
@@ -560,18 +548,16 @@ static int early_drop(struct list_head *chain)
return dropped;
}
-static inline int helper_cmp(const struct ip_conntrack_helper *i,
- const struct ip_conntrack_tuple *rtuple)
-{
- return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
static struct ip_conntrack_helper *
__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
{
- return LIST_FIND(&helpers, helper_cmp,
- struct ip_conntrack_helper *,
- tuple);
+ struct ip_conntrack_helper *h;
+
+ list_for_each_entry(h, &helpers, list) {
+ if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+ return h;
+ }
+ return NULL;
}
struct ip_conntrack_helper *
@@ -641,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
ip_conntrack_hash_rnd_initted = 1;
}
+ /* We don't want any race condition at early drop stage */
+ atomic_inc(&ip_conntrack_count);
+
if (ip_conntrack_max
- && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+ && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
unsigned int hash = hash_conntrack(orig);
/* Try dropping from this hash chain. */
if (!early_drop(&ip_conntrack_hash[hash])) {
+ atomic_dec(&ip_conntrack_count);
if (net_ratelimit())
printk(KERN_WARNING
"ip_conntrack: table full, dropping"
@@ -657,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
+ atomic_dec(&ip_conntrack_count);
return ERR_PTR(-ENOMEM);
}
@@ -670,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
- atomic_inc(&ip_conntrack_count);
-
return conntrack;
}
@@ -1063,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
{
BUG_ON(me->timeout == 0);
write_lock_bh(&ip_conntrack_lock);
- list_prepend(&helpers, me);
+ list_add(&me->list, &helpers);
write_unlock_bh(&ip_conntrack_lock);
return 0;
@@ -1082,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name)
return NULL;
}
-static inline int unhelp(struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_helper *me)
+static inline void unhelp(struct ip_conntrack_tuple_hash *i,
+ const struct ip_conntrack_helper *me)
{
if (tuplehash_to_ctrack(i)->helper == me) {
ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
tuplehash_to_ctrack(i)->helper = NULL;
}
- return 0;
}
void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
{
unsigned int i;
+ struct ip_conntrack_tuple_hash *h;
struct ip_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
write_lock_bh(&ip_conntrack_lock);
- LIST_DELETE(&helpers, me);
+ list_del(&me->list);
/* Get rid of expectations */
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1109,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
}
}
/* Get rid of expecteds, set helpers to NULL. */
- LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
- struct ip_conntrack_tuple_hash *, me);
+ list_for_each_entry(h, &unconfirmed, list)
+ unhelp(h, me);
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+ list_for_each_entry(h, &ip_conntrack_hash[i], list)
+ unhelp(h, me);
+ }
write_unlock_bh(&ip_conntrack_lock);
/* Someone could be still looking at the helper in a bh. */
@@ -1178,9 +1169,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
const struct ip_conntrack_tuple *tuple)
{
- NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+ NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
&tuple->src.u.tcp.port);
- NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+ NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
&tuple->dst.u.tcp.port);
return 0;
@@ -1195,9 +1186,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
return -EINVAL;
t->src.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
t->dst.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
return 0;
}
@@ -1238,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
nf_conntrack_get(nskb->nfct);
}
-static inline int
-do_iter(const struct ip_conntrack_tuple_hash *i,
- int (*iter)(struct ip_conntrack *i, void *data),
- void *data)
-{
- return iter(tuplehash_to_ctrack(i), data);
-}
-
/* Bring out ya dead! */
-static struct ip_conntrack_tuple_hash *
+static struct ip_conntrack *
get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
void *data, unsigned int *bucket)
{
- struct ip_conntrack_tuple_hash *h = NULL;
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
write_lock_bh(&ip_conntrack_lock);
for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
- h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- break;
+ list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
+ ct = tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ list_for_each_entry(h, &unconfirmed, list) {
+ ct = tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
- if (!h)
- h = LIST_FIND_W(&unconfirmed, do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
write_unlock_bh(&ip_conntrack_lock);
+ return NULL;
- return h;
+found:
+ atomic_inc(&ct->ct_general.use);
+ write_unlock_bh(&ip_conntrack_lock);
+ return ct;
}
void
ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
{
- struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
unsigned int bucket = 0;
- while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 4dcf526c3944..93dcf960662f 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -8,7 +8,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -426,8 +425,8 @@ static int help(struct sk_buff **pskb,
exp->tuple.src.u.tcp.port = 0; /* Don't care. */
exp->tuple.dst.protonum = IPPROTO_TCP;
exp->mask = ((struct ip_conntrack_tuple)
- { { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+ { { htonl(0xFFFFFFFF), { 0 } },
+ { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
exp->expectfn = NULL;
exp->flags = 0;
@@ -489,7 +488,7 @@ static int __init ip_conntrack_ftp_init(void)
for (i = 0; i < ports_c; i++) {
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].mask.src.u.tcp.port = 0xFFFF;
+ ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
ftp[i].mask.dst.protonum = 0xFF;
ftp[i].max_expected = 1;
ftp[i].timeout = 5 * 60; /* 5 minutes */
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 0665674218c6..7b7441202bfd 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -11,7 +11,6 @@
* For more information, please see http://nath323.sourceforge.net/
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -50,11 +49,11 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
int (*set_h245_addr_hook) (struct sk_buff ** pskb,
unsigned char **data, int dataoff,
H245_TransportAddress * addr,
- u_int32_t ip, u_int16_t port);
+ __be32 ip, u_int16_t port);
int (*set_h225_addr_hook) (struct sk_buff ** pskb,
unsigned char **data, int dataoff,
TransportAddress * addr,
- u_int32_t ip, u_int16_t port);
+ __be32 ip, u_int16_t port);
int (*set_sig_addr_hook) (struct sk_buff ** pskb,
struct ip_conntrack * ct,
enum ip_conntrack_info ctinfo,
@@ -210,7 +209,7 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
/****************************************************************************/
static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
- u_int32_t * ip, u_int16_t * port)
+ __be32 * ip, u_int16_t * port)
{
unsigned char *p;
@@ -233,7 +232,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
u_int16_t rtp_port;
struct ip_conntrack_expect *rtp_exp;
@@ -255,10 +254,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtp_exp->mask.src.ip = 0xFFFFFFFF;
+ rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
rtp_exp->mask.src.u.udp.port = 0;
- rtp_exp->mask.dst.ip = 0xFFFFFFFF;
- rtp_exp->mask.dst.u.udp.port = 0xFFFF;
+ rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
rtp_exp->mask.dst.protonum = 0xFF;
rtp_exp->flags = 0;
@@ -272,10 +271,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtcp_exp->mask.src.ip = 0xFFFFFFFF;
+ rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
rtcp_exp->mask.src.u.udp.port = 0;
- rtcp_exp->mask.dst.ip = 0xFFFFFFFF;
- rtcp_exp->mask.dst.u.udp.port = 0xFFFF;
+ rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
rtcp_exp->mask.dst.protonum = 0xFF;
rtcp_exp->flags = 0;
@@ -326,7 +325,7 @@ static int expect_t120(struct sk_buff **pskb,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
@@ -343,10 +342,10 @@ static int expect_t120(struct sk_buff **pskb,
exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */
@@ -627,7 +626,7 @@ void ip_conntrack_h245_expect(struct ip_conntrack *new,
/****************************************************************************/
int get_h225_addr(unsigned char *data, TransportAddress * addr,
- u_int32_t * ip, u_int16_t * port)
+ __be32 * ip, u_int16_t * port)
{
unsigned char *p;
@@ -649,7 +648,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
@@ -666,10 +665,10 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = 0;
@@ -710,7 +709,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
@@ -752,10 +751,10 @@ static int expect_callforwarding(struct sk_buff **pskb,
exp->tuple.dst.ip = ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = 0;
@@ -792,7 +791,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
int ret;
int i;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
DEBUGP("ip_ct_q931: Setup\n");
@@ -1189,7 +1188,7 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
/****************************************************************************/
static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
- u_int32_t ip, u_int16_t port)
+ __be32 ip, u_int16_t port)
{
struct ip_conntrack_expect *exp;
struct ip_conntrack_tuple tuple;
@@ -1201,7 +1200,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
tuple.dst.protonum = IPPROTO_TCP;
exp = __ip_conntrack_expect_find(&tuple);
- if (exp->master == ct)
+ if (exp && exp->master == ct)
return exp;
return NULL;
}
@@ -1229,7 +1228,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
int i;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp;
@@ -1252,10 +1251,10 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = gkrouted_only ? 0xFFFFFFFF : 0;
+ exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */
@@ -1308,7 +1307,7 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp;
@@ -1334,10 +1333,10 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->tuple.dst.ip = ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_UDP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = 0;
exp->expectfn = ip_conntrack_ras_expect;
@@ -1478,7 +1477,7 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
{
struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
int dir = CTINFO2DIR(ctinfo);
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
DEBUGP("ip_ct_ras: ARQ\n");
@@ -1514,7 +1513,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp;
@@ -1539,10 +1538,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->tuple.dst.ip = ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = IP_CT_EXPECT_PERMANENT;
exp->expectfn = ip_conntrack_q931_expect;
@@ -1582,7 +1581,7 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
@@ -1599,10 +1598,10 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->tuple.dst.ip = ip;
exp->tuple.dst.u.tcp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.tcp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->flags = IP_CT_EXPECT_PERMANENT;
exp->expectfn = ip_conntrack_q931_expect;
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 8ccfe17bb253..a2af5e0c7f99 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
* - We can only support one single call within each session
*
* TODO:
- * - testing of incoming PPTP calls
+ * - testing of incoming PPTP calls
*
- * Changes:
+ * Changes:
* 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
+ * - Call ip_conntrack_unexpect_related() from
* pptp_destroy_siblings() to destroy expectations in case
* CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
* (Philip Craig <philipc@snapgear.com>)
@@ -46,7 +46,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -81,7 +80,7 @@ int
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq);
-int
+void
(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
struct ip_conntrack_expect *expect_reply);
@@ -142,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
invert_tuplepr(&inv_t, &exp->tuple);
DEBUGP("trying to unexpect other dir: ");
DUMP_TUPLE(&inv_t);
-
+
exp_other = ip_conntrack_expect_find(&inv_t);
if (exp_other) {
/* delete other expectation. */
@@ -195,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
{
struct ip_conntrack_tuple t;
- /* Since ct->sibling_list has literally rusted away in 2.6.11,
+ ip_ct_gre_keymap_destroy(ct);
+ /* Since ct->sibling_list has literally rusted away in 2.6.11,
* we now need another way to find out about our sibling
* contrack and expects... -HW */
/* try original (pns->pac) tuple */
memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+ t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
+ t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
if (!destroy_sibling_or_exp(&t))
DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -211,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
/* try reply (pac->pns) tuple */
memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+ t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
+ t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
if (!destroy_sibling_or_exp(&t))
DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -220,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
static inline int
-exp_gre(struct ip_conntrack *master,
- u_int32_t seq,
+exp_gre(struct ip_conntrack *ct,
__be16 callid,
__be16 peer_callid)
{
- struct ip_conntrack_tuple inv_tuple;
- struct ip_conntrack_tuple exp_tuples[] = {
- /* tuple in original direction, PNS->PAC */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
- .u = { .gre = { .key = peer_callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
- .u = { .gre = { .key = callid } },
- .protonum = IPPROTO_GRE
- },
- },
- /* tuple in reply direction, PAC->PNS */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
- .u = { .gre = { .key = callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
- .u = { .gre = { .key = peer_callid } },
- .protonum = IPPROTO_GRE
- },
- }
- };
struct ip_conntrack_expect *exp_orig, *exp_reply;
int ret = 1;
- exp_orig = ip_conntrack_expect_alloc(master);
+ exp_orig = ip_conntrack_expect_alloc(ct);
if (exp_orig == NULL)
goto out;
- exp_reply = ip_conntrack_expect_alloc(master);
+ exp_reply = ip_conntrack_expect_alloc(ct);
if (exp_reply == NULL)
goto out_put_orig;
- memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+ /* original direction, PNS->PAC */
+ exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ exp_orig->tuple.src.u.gre.key = peer_callid;
+ exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ exp_orig->tuple.dst.u.gre.key = callid;
+ exp_orig->tuple.dst.protonum = IPPROTO_GRE;
- exp_orig->mask.src.ip = 0xffffffff;
+ exp_orig->mask.src.ip = htonl(0xffffffff);
exp_orig->mask.src.u.all = 0;
- exp_orig->mask.dst.u.all = 0;
exp_orig->mask.dst.u.gre.key = htons(0xffff);
- exp_orig->mask.dst.ip = 0xffffffff;
+ exp_orig->mask.dst.ip = htonl(0xffffffff);
exp_orig->mask.dst.protonum = 0xff;
-
- exp_orig->master = master;
+
+ exp_orig->master = ct;
exp_orig->expectfn = pptp_expectfn;
exp_orig->flags = 0;
/* both expectations are identical apart from tuple */
memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
- memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
- if (ip_nat_pptp_hook_exp_gre)
- ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
- else {
-
- DEBUGP("calling expect_related PNS->PAC");
- DUMP_TUPLE(&exp_orig->tuple);
-
- if (ip_conntrack_expect_related(exp_orig) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_put_both;
- }
+ /* reply direction, PAC->PNS */
+ exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ exp_reply->tuple.src.u.gre.key = callid;
+ exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ exp_reply->tuple.dst.u.gre.key = peer_callid;
+ exp_reply->tuple.dst.protonum = IPPROTO_GRE;
- DEBUGP("calling expect_related PAC->PNS");
- DUMP_TUPLE(&exp_reply->tuple);
-
- if (ip_conntrack_expect_related(exp_reply) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_unexpect_orig;
- }
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
- DEBUGP("cannot keymap_add() exp\n");
- goto out_unexpect_both;
- }
-
- invert_tuplepr(&inv_tuple, &exp_reply->tuple);
- if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(master);
- DEBUGP("cannot keymap_add() exp_inv\n");
- goto out_unexpect_both;
- }
- ret = 0;
+ if (ip_nat_pptp_hook_exp_gre)
+ ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+ if (ip_conntrack_expect_related(exp_orig) != 0)
+ goto out_put_both;
+ if (ip_conntrack_expect_related(exp_reply) != 0)
+ goto out_unexpect_orig;
+
+ /* Add GRE keymap entries */
+ if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
+ goto out_unexpect_both;
+ if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
+ ip_ct_gre_keymap_destroy(ct);
+ goto out_unexpect_both;
}
+ ret = 0;
out_put_both:
ip_conntrack_expect_put(exp_reply);
@@ -323,73 +292,36 @@ out_unexpect_orig:
goto out_put_both;
}
-static inline int
+static inline int
pptp_inbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
- __be16 *cid, *pcid;
- u_int32_t seq;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
+ __be16 cid = 0, pcid = 0;
msg = ntohs(ctlh->messageType);
DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
switch (msg) {
case PPTP_START_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.srep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED) {
- DEBUGP("%s without START_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate < PPTP_SESSION_REQUESTED)
+ goto invalid;
if (pptpReq->srep.resultCode == PPTP_START_OK)
info->sstate = PPTP_SESSION_CONFIRMED;
- else
+ else
info->sstate = PPTP_SESSION_ERROR;
break;
case PPTP_STOP_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.strep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ) {
- DEBUGP("%s without STOP_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate > PPTP_SESSION_STOPREQ)
+ goto invalid;
if (pptpReq->strep.resultCode == PPTP_STOP_OK)
info->sstate = PPTP_SESSION_NONE;
else
@@ -397,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
case PPTP_OUT_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.ocack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF) {
- DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+ info->cstate != PPTP_CALL_OUT_CONF)
+ goto invalid;
+
+ cid = pptpReq->ocack.callID;
+ pcid = pptpReq->ocack.peersCallID;
+ if (info->pns_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+ info->cstate = PPTP_CALL_OUT_CONF;
+ info->pac_call_id = cid;
+ exp_gre(ct, cid, pcid);
+ } else
info->cstate = PPTP_CALL_NONE;
- break;
- }
-
- cid = &pptpReq->ocack.callID;
- pcid = &pptpReq->ocack.peersCallID;
-
- info->pac_call_id = ntohs(*cid);
-
- if (htons(info->pns_call_id) != *pcid) {
- DEBUGP("%s for unknown callid %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
-
- DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
- ntohs(*cid), ntohs(*pcid));
-
- info->cstate = PPTP_CALL_OUT_CONF;
-
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
break;
case PPTP_IN_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+
+ cid = pptpReq->icreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = ntohs(*pcid);
+ info->pac_call_id = cid;
break;
case PPTP_IN_CALL_CONNECT:
- if (reqlen < sizeof(_pptpReq.iccon)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- if (info->cstate != PPTP_CALL_IN_REP
- && info->cstate != PPTP_CALL_IN_CONF) {
- DEBUGP("%s but never sent IN_CALL_REPLY\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_IN_REP &&
+ info->cstate != PPTP_CALL_IN_CONF)
+ goto invalid;
- pcid = &pptpReq->iccon.peersCallID;
- cid = &info->pac_call_id;
+ pcid = pptpReq->iccon.peersCallID;
+ cid = info->pac_call_id;
- if (info->pns_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown CallID %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
+ if (info->pns_call_id != pcid)
+ goto invalid;
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
info->cstate = PPTP_CALL_IN_CONF;
/* we expect a GRE connection from PAC to PNS */
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
-
+ exp_gre(ct, cid, pcid);
break;
case PPTP_CALL_DISCONNECT_NOTIFY:
- if (reqlen < sizeof(_pptpReq.disc)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms disconnect */
- cid = &pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+ cid = pptpReq->disc.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
info->cstate = PPTP_CALL_NONE;
/* untrack this call id, unexpect GRE packets */
@@ -514,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
case PPTP_WAN_ERROR_NOTIFY:
- break;
-
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* I don't have to explain these ;) */
break;
default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
- ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
- break;
+ goto invalid;
}
-
if (ip_nat_pptp_hook_inbound)
return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
pptpReq);
-
return NF_ACCEPT;
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
}
static inline int
pptp_outbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
- __be16 *cid, *pcid;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
+ __be16 cid = 0, pcid = 0;
msg = ntohs(ctlh->messageType);
DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -569,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
switch (msg) {
case PPTP_START_SESSION_REQUEST:
/* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE) {
- DEBUGP("%s but we already have one",
- pptp_msg_name[msg]);
- }
+ if (info->sstate != PPTP_SESSION_NONE)
+ goto invalid;
info->sstate = PPTP_SESSION_REQUESTED;
break;
case PPTP_STOP_SESSION_REQUEST:
@@ -581,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb,
break;
case PPTP_OUT_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.ocreq)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- /* FIXME: break; */
- }
-
/* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
info->cstate = PPTP_CALL_OUT_REQ;
/* track PNS call id */
- cid = &pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
- info->pns_call_id = ntohs(*cid);
+ cid = pptpReq->ocreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->pns_call_id = cid;
break;
case PPTP_IN_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ
- && info->cstate != PPTP_CALL_IN_REP) {
- DEBUGP("%s without incall_req\n",
- pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+ if (info->cstate != PPTP_CALL_IN_REQ &&
+ info->cstate != PPTP_CALL_IN_REP)
+ goto invalid;
+
+ cid = pptpReq->icack.callID;
+ pcid = pptpReq->icack.peersCallID;
+ if (info->pac_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+ /* part two of the three-way handshake */
+ info->cstate = PPTP_CALL_IN_REP;
+ info->pns_call_id = cid;
+ } else
info->cstate = PPTP_CALL_NONE;
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- if (info->pac_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown call %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = ntohs(pptpReq->icack.callID);
break;
case PPTP_CALL_CLEAR_REQUEST:
/* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("CLEAR_CALL but no session\n");
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
/* FUTURE: iterate over all calls and check if
* call ID is valid. We don't do this without newnat,
* because we only know about last call */
info->cstate = PPTP_CALL_CLEAR_REQ;
break;
case PPTP_SET_LINK_INFO:
- break;
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* I don't have to explain these ;) */
break;
default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0], msg);
- /* unknown: no need to create GRE masq table entry */
- break;
+ goto invalid;
}
-
+
if (ip_nat_pptp_hook_outbound)
return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
pptpReq);
+ return NF_ACCEPT;
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
}
+static const unsigned int pptp_msg_size[] = {
+ [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
+ [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
+ [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
+ [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
+ [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
+ [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
+ [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
+ [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
+ [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
+ [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
+ [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+ [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
+ [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
+};
/* track caller id inside control connection, call expect_related */
-static int
+static int
conntrack_pptp_help(struct sk_buff **pskb,
struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
{
- struct pptp_pkt_hdr _pptph, *pptph;
- struct tcphdr _tcph, *tcph;
- u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
- u_int32_t datalen;
int dir = CTINFO2DIR(ctinfo);
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- unsigned int nexthdr_off;
-
+ struct tcphdr _tcph, *tcph;
+ struct pptp_pkt_hdr _pptph, *pptph;
+ struct PptpControlHeader _ctlh, *ctlh;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+ unsigned int datalen, reqlen, nexthdr_off;
int oldsstate, oldcstate;
int ret;
+ u_int16_t msg;
/* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
+ if (ctinfo != IP_CT_ESTABLISHED
&& ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
DEBUGP("ctinfo = %u, skipping\n", ctinfo);
return NF_ACCEPT;
}
-
+
nexthdr_off = (*pskb)->nh.iph->ihl*4;
tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
BUG_ON(!tcph);
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
- if (tcph->fin || tcph->rst) {
- DEBUGP("RST/FIN received, timeouting GRE\n");
- /* can't do this after real newnat */
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- }
-
pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
if (!pptph) {
DEBUGP("no full PPTP header, can't track\n");
@@ -713,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb,
return NF_ACCEPT;
}
+ ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh)
+ return NF_ACCEPT;
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ msg = ntohs(ctlh->messageType);
+ if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+ return NF_ACCEPT;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+
+ pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq)
+ return NF_ACCEPT;
+
oldsstate = info->sstate;
oldcstate = info->cstate;
@@ -722,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb,
* established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
ctinfo);
else
/* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
ctinfo);
DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
oldsstate, info->sstate, oldcstate, info->cstate);
@@ -736,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb,
}
/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
+static struct ip_conntrack_helper pptp = {
.list = { NULL, NULL },
- .name = "pptp",
+ .name = "pptp",
.me = THIS_MODULE,
.max_expected = 2,
.timeout = 5 * 60,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
+ .tuple = { .src = { .ip = 0,
+ .u = { .tcp = { .port =
+ __constant_htons(PPTP_CONTROL_PORT) } }
+ },
+ .dst = { .ip = 0,
.u = { .all = 0 },
.protonum = IPPROTO_TCP
- }
+ }
},
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = __constant_htons(0xffff) } }
- },
- .dst = { .ip = 0,
+ .mask = { .src = { .ip = 0,
+ .u = { .tcp = { .port = __constant_htons(0xffff) } }
+ },
+ .dst = { .ip = 0,
.u = { .all = 0 },
- .protonum = 0xff
- }
+ .protonum = 0xff
+ }
},
- .help = conntrack_pptp_help
+ .help = conntrack_pptp_help,
+ .destroy = pptp_destroy_siblings,
};
extern void ip_ct_proto_gre_fini(void);
@@ -769,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void);
static int __init ip_conntrack_helper_pptp_init(void)
{
int retcode;
-
+
retcode = ip_ct_proto_gre_init();
if (retcode < 0)
return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index a2ac5ce544b2..75f7c3db1619 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -22,7 +22,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -219,7 +218,8 @@ static int help(struct sk_buff **pskb,
IPPROTO_TCP }});
exp->mask = ((struct ip_conntrack_tuple)
{ { 0, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+ { htonl(0xFFFFFFFF),
+ { .tcp = { htons(0xFFFF) } }, 0xFF }});
exp->expectfn = NULL;
exp->flags = 0;
if (ip_nat_irc_hook)
@@ -267,7 +267,7 @@ static int __init ip_conntrack_irc_init(void)
hlpr = &irc_helpers[i];
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->mask.src.u.tcp.port = 0xFFFF;
+ hlpr->mask.src.u.tcp.port = htons(0xFFFF);
hlpr->mask.dst.protonum = 0xFF;
hlpr->max_expected = max_dcc_channels;
hlpr->timeout = dcc_timeout;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81325b2..a1d6a89f64aa 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
+#include <linux/if_addr.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <net/route.h>
@@ -47,7 +48,7 @@ static int help(struct sk_buff **pskb,
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct in_device *in_dev;
- u_int32_t mask = 0;
+ __be32 mask = 0;
/* we're only interested in locally generated packets */
if ((*pskb)->sk == NULL)
@@ -77,12 +78,12 @@ static int help(struct sk_buff **pskb,
goto out;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = ntohs(NMBD_PORT);
+ exp->tuple.src.u.udp.port = htons(NMBD_PORT);
exp->mask.src.ip = mask;
- exp->mask.src.u.udp.port = 0xFFFF;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.udp.port = 0xFFFF;
+ exp->mask.src.u.udp.port = htons(0xFFFF);
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.udp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->expectfn = NULL;
@@ -114,7 +115,7 @@ static struct ip_conntrack_helper helper = {
.src = {
.u = {
.udp = {
- .port = 0xFFFF,
+ .port = __constant_htons(0xFFFF),
}
}
},
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 33891bb1fde4..53b6dffea6c2 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -78,8 +78,8 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb,
{
struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
- NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
- NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip);
+ NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
+ NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
NFA_NEST_END(skb, nest_parms);
@@ -110,7 +110,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
static inline int
ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
{
- u_int32_t status = htonl((u_int32_t) ct->status);
+ __be32 status = htonl((u_int32_t) ct->status);
NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
return 0;
@@ -122,7 +122,7 @@ static inline int
ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
{
long timeout_l = ct->timeout.expires - jiffies;
- u_int32_t timeout;
+ __be32 timeout;
if (timeout_l < 0)
timeout = 0;
@@ -192,13 +192,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nfattr *nest_count = NFA_NEST(skb, type);
- u_int32_t tmp;
+ __be32 tmp;
tmp = htonl(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+ NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
tmp = htonl(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+ NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
NFA_NEST_END(skb, nest_count);
@@ -215,9 +215,9 @@ nfattr_failure:
static inline int
ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
{
- u_int32_t mark = htonl(ct->mark);
+ __be32 mark = htonl(ct->mark);
- NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+ NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
return 0;
nfattr_failure:
@@ -230,8 +230,8 @@ nfattr_failure:
static inline int
ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
{
- u_int32_t id = htonl(ct->id);
- NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+ __be32 id = htonl(ct->id);
+ NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
return 0;
nfattr_failure:
@@ -241,9 +241,9 @@ nfattr_failure:
static inline int
ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
{
- u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+ __be32 use = htonl(atomic_read(&ct->ct_general.use));
- NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+ NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
return 0;
nfattr_failure:
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
/* dump everything */
events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS |
- IPCT_PROTOINFO |
- IPCT_HELPER |
- IPCT_HELPINFO |
- IPCT_NATINFO)) {
+ } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
@@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
goto nfattr_failure;
+ if (events & IPCT_MARK
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nfattr_failure;
+
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
return NOTIFY_DONE;
@@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0], *id);
read_lock_bh(&ip_conntrack_lock);
+ last = (struct ip_conntrack *)cb->args[1];
for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
restart:
- last = (struct ip_conntrack *)cb->args[1];
list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
h = (struct ip_conntrack_tuple_hash *) i;
if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = tuplehash_to_ctrack(h);
- if (last != NULL) {
- if (ct == last) {
- ip_conntrack_put(last);
- cb->args[1] = 0;
- last = NULL;
- } else
+ if (cb->args[1]) {
+ if (ct != last)
continue;
+ cb->args[1] = 0;
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -439,64 +436,29 @@ restart:
cb->args[1] = (unsigned long)ct;
goto out;
}
+#ifdef CONFIG_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+ IPCTNL_MSG_CT_GET_CTRZERO)
+ memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
}
- if (last != NULL) {
- ip_conntrack_put(last);
+ if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
read_unlock_bh(&ip_conntrack_lock);
+ if (last)
+ ip_conntrack_put(last);
DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
- return skb->len;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct = NULL;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[1];
-
- DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
- cb->args[0], *id);
-
- write_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (ct->id <= *id)
- continue;
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0)
- goto out;
- *id = ct->id;
-
- memset(&ct->counters, 0, sizeof(ct->counters));
- }
- }
-out:
- write_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
return skb->len;
}
-#endif
static const size_t cta_min_ip[CTA_IP_MAX] = {
- [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
- [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
+ [CTA_IP_V4_SRC-1] = sizeof(__be32),
+ [CTA_IP_V4_DST-1] = sizeof(__be32),
};
static inline int
@@ -513,11 +475,11 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
if (!tb[CTA_IP_V4_SRC-1])
return -EINVAL;
- tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+ tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
if (!tb[CTA_IP_V4_DST-1])
return -EINVAL;
- tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+ tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
DEBUGP("leaving\n");
@@ -640,8 +602,8 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
}
static const size_t cta_min_nat[CTA_NAT_MAX] = {
- [CTA_NAT_MINIP-1] = sizeof(u_int32_t),
- [CTA_NAT_MAXIP-1] = sizeof(u_int32_t),
+ [CTA_NAT_MINIP-1] = sizeof(__be32),
+ [CTA_NAT_MAXIP-1] = sizeof(__be32),
};
static inline int
@@ -661,12 +623,12 @@ ctnetlink_parse_nat(struct nfattr *nat,
return -EINVAL;
if (tb[CTA_NAT_MINIP-1])
- range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+ range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
if (!tb[CTA_NAT_MAXIP-1])
range->max_ip = range->min_ip;
else
- range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+ range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
if (range->min_ip)
range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -701,11 +663,11 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
}
static const size_t cta_min[CTA_MAX] = {
- [CTA_STATUS-1] = sizeof(u_int32_t),
- [CTA_TIMEOUT-1] = sizeof(u_int32_t),
- [CTA_MARK-1] = sizeof(u_int32_t),
- [CTA_USE-1] = sizeof(u_int32_t),
- [CTA_ID-1] = sizeof(u_int32_t)
+ [CTA_STATUS-1] = sizeof(__be32),
+ [CTA_TIMEOUT-1] = sizeof(__be32),
+ [CTA_MARK-1] = sizeof(__be32),
+ [CTA_USE-1] = sizeof(__be32),
+ [CTA_ID-1] = sizeof(__be32)
};
static int
@@ -744,7 +706,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
ct = tuplehash_to_ctrack(h);
if (cda[CTA_ID-1]) {
- u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+ u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
if (ct->id != id) {
ip_conntrack_put(ct);
return -ENOENT;
@@ -778,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (msg->nfgen_family != AF_INET)
return -EAFNOSUPPORT;
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_IP_NF_CT_ACCT
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table_w,
- ctnetlink_done)) != 0)
- return -EINVAL;
-#else
+#ifndef CONFIG_IP_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
return -ENOTSUPP;
#endif
- } else {
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
+ if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_dump_table,
+ ctnetlink_done)) != 0)
return -EINVAL;
- }
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
@@ -854,7 +808,7 @@ static inline int
ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
{
unsigned long d;
- unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+ unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
d = ct->status ^ status;
if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -949,7 +903,7 @@ ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
static inline int
ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
{
- u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+ u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
if (!del_timer(&ct->timeout))
return -ETIME;
@@ -1012,7 +966,7 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+ ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
#endif
DEBUGP("all done\n");
@@ -1035,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (!cda[CTA_TIMEOUT-1])
goto err;
- ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+ ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
ct->status |= IPS_CONFIRMED;
@@ -1052,7 +1006,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+ ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
#endif
ct->helper = ip_conntrack_helper_find_get(rtuple);
@@ -1184,8 +1138,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct ip_conntrack_expect *exp)
{
struct ip_conntrack *master = exp->master;
- u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
- u_int32_t id = htonl(exp->id);
+ __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+ __be32 id = htonl(exp->id);
if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
goto nfattr_failure;
@@ -1196,8 +1150,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
CTA_EXPECT_MASTER) < 0)
goto nfattr_failure;
- NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
- NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+ NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
+ NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
return 0;
@@ -1256,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
} else
return NOTIFY_DONE;
+ if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+ return NOTIFY_DONE;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
@@ -1315,8 +1272,8 @@ out:
}
static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
- [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t),
- [CTA_EXPECT_ID-1] = sizeof(u_int32_t)
+ [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32),
+ [CTA_EXPECT_ID-1] = sizeof(__be32)
};
static int
@@ -1364,7 +1321,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+ __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
if (exp->id != ntohl(id)) {
ip_conntrack_expect_put(exp);
return -ENOENT;
@@ -1418,8 +1375,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id =
- *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+ __be32 id =
+ *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
if (exp->id != ntohl(id)) {
ip_conntrack_expect_put(exp);
return -ENOENT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308b5e4c..36f2b5e5d80a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 21ee124c0463..5fe026f467d3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
/*
- * ip_conntrack_proto_gre.c - Version 3.0
+ * ip_conntrack_proto_gre.c - Version 3.0
*
* Connection tracking protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
* suited for NAT, as it has no protocol-specific part as port numbers.
*
- * It has an optional key field, which may help us distinguishing two
+ * It has an optional key field, which may help us distinguishing two
* connections between the same two hosts.
*
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
*
* PPTP is built on top of a modified version of GRE, and has a mandatory
* field called "CallID", which serves us for the same purpose as the key
@@ -23,7 +23,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/timer.h>
@@ -38,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock);
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -63,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
#define DEBUGP(x, args...)
#define DUMP_TUPLE_GRE(x)
#endif
-
+
/* GRE KEYMAP HANDLING FUNCTIONS */
static LIST_HEAD(gre_keymap_list);
@@ -83,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
__be16 key = 0;
read_lock_bh(&ip_ct_gre_lock);
- km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (km)
- key = km->tuple.src.u.gre.key;
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t)) {
+ key = km->tuple.src.u.gre.key;
+ break;
+ }
+ }
read_unlock_bh(&ip_ct_gre_lock);
-
+
DEBUGP("lookup src key 0x%x up key for ", key);
DUMP_TUPLE_GRE(t);
@@ -100,28 +100,25 @@ int
ip_ct_gre_keymap_add(struct ip_conntrack *ct,
struct ip_conntrack_tuple *t, int reply)
{
- struct ip_ct_gre_keymap **exist_km, *km, *old;
+ struct ip_ct_gre_keymap **exist_km, *km;
if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
DEBUGP("refusing to add GRE keymap to non-pptp session\n");
return -1;
}
- if (!reply)
+ if (!reply)
exist_km = &ct->help.ct_pptp_info.keymap_orig;
else
exist_km = &ct->help.ct_pptp_info.keymap_reply;
if (*exist_km) {
/* check whether it's a retransmission */
- old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (old == *exist_km) {
- DEBUGP("retransmission\n");
- return 0;
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t) && km == *exist_km)
+ return 0;
}
-
- DEBUGP("trying to override keymap_%s for ct %p\n",
+ DEBUGP("trying to override keymap_%s for ct %p\n",
reply? "reply":"orig", ct);
return -EEXIST;
}
@@ -137,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
DUMP_TUPLE_GRE(&km->tuple);
write_lock_bh(&ip_ct_gre_lock);
- list_append(&gre_keymap_list, km);
+ list_add_tail(&km->list, &gre_keymap_list);
write_unlock_bh(&ip_ct_gre_lock);
return 0;
@@ -155,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
write_lock_bh(&ip_ct_gre_lock);
if (ct->help.ct_pptp_info.keymap_orig) {
- DEBUGP("removing %p from list\n",
+ DEBUGP("removing %p from list\n",
ct->help.ct_pptp_info.keymap_orig);
list_del(&ct->help.ct_pptp_info.keymap_orig->list);
kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -223,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
static int gre_print_tuple(struct seq_file *s,
const struct ip_conntrack_tuple *tuple)
{
- return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
ntohs(tuple->src.u.gre.key),
ntohs(tuple->dst.u.gre.key));
}
@@ -253,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct,
} else
ip_ct_refresh_acct(ct, conntrackinfo, skb,
ct->proto.gre.timeout);
-
+
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int gre_new(struct ip_conntrack *ct,
const struct sk_buff *skb)
-{
+{
DEBUGP(": ");
DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
@@ -286,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct)
}
/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
+static struct ip_conntrack_protocol gre = {
.proto = IPPROTO_GRE,
- .name = "gre",
+ .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
.print_tuple = gre_print_tuple,
@@ -326,7 +323,7 @@ void ip_ct_proto_gre_fini(void)
}
write_unlock_bh(&ip_ct_gre_lock);
- ip_conntrack_protocol_unregister(&gre);
+ ip_conntrack_protocol_unregister(&gre);
}
EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c504586d..295b6fa340db 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
@@ -261,7 +261,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
static int icmp_tuple_to_nfattr(struct sk_buff *skb,
const struct ip_conntrack_tuple *t)
{
- NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+ NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
&t->src.u.icmp.id);
NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
&t->dst.u.icmp.type);
@@ -287,7 +287,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
tuple->dst.u.icmp.code =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
tuple->src.u.icmp.id =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
if (tuple->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[tuple->dst.u.icmp.type])
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 0416073c5600..2443322e4128 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
static const unsigned int * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
@@ -210,7 +210,7 @@ static int sctp_print_conntrack(struct seq_file *s,
for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
offset < skb->len && \
(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
- offset += (htons(sch->length) + 3) & ~3, count++)
+ offset += (ntohs(sch->length) + 3) & ~3, count++)
/* Some validity checks to make sure the chunks are fine */
static int do_basic_checks(struct ip_conntrack *conntrack,
@@ -254,7 +254,7 @@ static int do_basic_checks(struct ip_conntrack *conntrack,
}
DEBUGP("Basic checks passed\n");
- return 0;
+ return count == 0;
}
static int new_state(enum ip_conntrack_dir dir,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index c5c2ce5cdeb8..06e4e8a6dd9f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -19,7 +19,6 @@
* version 2.2
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
@@ -49,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal = 0;
+int ip_ct_tcp_be_liberal __read_mostly = 0;
/* When connection is picked up from the middle, how many packets are required
to pass in each direction when we assume we are in sync - if any side uses
window scaling, we lost the game.
If it is set to zero, we disable picking up already established
connections. */
-int ip_ct_tcp_loose = 3;
+int ip_ct_tcp_loose __read_mostly = 3;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
-int ip_ct_tcp_max_retrans = 3;
+int ip_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
@@ -84,19 +83,19 @@ static const char *tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
-unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close = 10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
+unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
static const unsigned int * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
@@ -520,8 +519,8 @@ static void tcp_sack(const struct sk_buff *skb,
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__u32 *)ptr ==
- __constant_ntohl((TCPOPT_NOP << 24)
+ && *(__be32 *)ptr ==
+ __constant_htonl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP))
@@ -552,7 +551,7 @@ static void tcp_sack(const struct sk_buff *skb,
for (i = 0;
i < (opsize - TCPOLEN_SACK_BASE);
i += TCPOLEN_SACK_PERBLOCK) {
- tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+ tmp = ntohl(*((__be32 *)(ptr+i)+1));
if (after(tmp, *sack))
*sack = tmp;
@@ -732,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
- && state->last_end == end)
+ && state->last_end == end
+ && state->last_win == win)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
+ state->last_win = win;
state->retrans = 0;
}
}
@@ -866,8 +867,7 @@ static int tcp_error(struct sk_buff *skb,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because it is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b4d2ff..d0e8a16970ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_udp_timeout = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index fc87ce0da40d..f4f75995a9e4 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/skbuff.h>
@@ -194,7 +193,7 @@ static int skp_digits_len(const char *dptr, const char *limit, int *shift)
/* Simple ipaddr parser.. */
static int parse_ipaddr(const char *cp, const char **endp,
- u_int32_t *ipaddr, const char *limit)
+ __be32 *ipaddr, const char *limit)
{
unsigned long int val;
int i, digit = 0;
@@ -228,7 +227,7 @@ static int parse_ipaddr(const char *cp, const char **endp,
static int epaddr_len(const char *dptr, const char *limit, int *shift)
{
const char *aux = dptr;
- u_int32_t ip;
+ __be32 ip;
if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
DEBUGP("ip: %s parse failed.!\n", dptr);
@@ -303,7 +302,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen,
static int set_expected_rtp(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
- u_int32_t ipaddr, u_int16_t port,
+ __be32 ipaddr, u_int16_t port,
const char *dptr)
{
struct ip_conntrack_expect *exp;
@@ -320,10 +319,10 @@ static int set_expected_rtp(struct sk_buff **pskb,
exp->tuple.dst.u.udp.port = htons(port);
exp->tuple.dst.protonum = IPPROTO_UDP;
- exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.ip = htonl(0xFFFFFFFF);
exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.udp.port = 0xFFFF;
+ exp->mask.dst.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.udp.port = htons(0xFFFF);
exp->mask.dst.protonum = 0xFF;
exp->expectfn = NULL;
@@ -350,7 +349,7 @@ static int sip_help(struct sk_buff **pskb,
const char *dptr;
int ret = NF_ACCEPT;
int matchoff, matchlen;
- u_int32_t ipaddr;
+ __be32 ipaddr;
u_int16_t port;
/* No Data ? */
@@ -440,9 +439,9 @@ static int __init init(void)
sip[i].tuple.dst.protonum = IPPROTO_UDP;
sip[i].tuple.src.u.udp.port = htons(ports[i]);
- sip[i].mask.src.u.udp.port = 0xFFFF;
+ sip[i].mask.src.u.udp.port = htons(0xFFFF);
sip[i].mask.dst.protonum = 0xFF;
- sip[i].max_expected = 1;
+ sip[i].max_expected = 2;
sip[i].timeout = 3 * 60; /* 3 minutes */
sip[i].me = THIS_MODULE;
sip[i].help = sip_help;
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 88445aac3f28..02135756562e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -12,7 +12,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
@@ -36,7 +35,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -535,6 +533,8 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
/* Sysctl support */
+int ip_conntrack_checksum __read_mostly = 1;
+
#ifdef CONFIG_SYSCTL
/* From ip_conntrack_core.c */
@@ -562,15 +562,13 @@ extern unsigned int ip_ct_udp_timeout_stream;
/* From ip_conntrack_proto_icmp.c */
extern unsigned int ip_ct_icmp_timeout;
-/* From ip_conntrack_proto_icmp.c */
+/* From ip_conntrack_proto_generic.c */
extern unsigned int ip_ct_generic_timeout;
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-int ip_conntrack_checksum = 1;
-
static struct ctl_table_header *ip_ct_sysctl_header;
static ctl_table ip_ct_sysctl_table[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index 7e33d3bed5e3..fe0b634dd377 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -70,10 +70,10 @@ static int tftp_help(struct sk_buff **pskb,
return NF_DROP;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->mask.src.ip = 0xffffffff;
+ exp->mask.src.ip = htonl(0xffffffff);
exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = 0xffffffff;
- exp->mask.dst.u.udp.port = 0xffff;
+ exp->mask.dst.ip = htonl(0xffffffff);
+ exp->mask.dst.u.udp.port = htons(0xffff);
exp->mask.dst.protonum = 0xff;
exp->expectfn = NULL;
exp->flags = 0;
@@ -129,7 +129,7 @@ static int __init ip_conntrack_tftp_init(void)
tftp[i].tuple.dst.protonum = IPPROTO_UDP;
tftp[i].tuple.src.u.udp.port = htons(ports[i]);
tftp[i].mask.dst.protonum = 0xFF;
- tftp[i].mask.src.u.udp.port = 0xFFFF;
+ tftp[i].mask.src.u.udp.port = htons(0xFFFF);
tftp[i].max_expected = 1;
tftp[i].timeout = 5 * 60; /* 5 minutes */
tftp[i].me = THIS_MODULE;
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d555ad0d..4b6260a97408 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
#include <linux/udp.h>
#include <linux/jhash.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -86,7 +82,7 @@ static inline unsigned int
hash_by_src(const struct ip_conntrack_tuple *tuple)
{
/* Original src, to ensure we map it consistently if poss. */
- return jhash_3words(tuple->src.ip, tuple->src.u.all,
+ return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
tuple->dst.protonum, 0) % ip_nat_htable_size;
}
@@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
write_unlock_bh(&ip_nat_lock);
}
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong. Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
/* Is this tuple already taken? (not by us) */
int
ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -206,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *conntrack,
enum ip_nat_manip_type maniptype)
{
- u_int32_t *var_ipp;
+ __be32 *var_ipp;
/* Host order */
u_int32_t minip, maxip, j;
@@ -233,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
* like this), even across reboots. */
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
- j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
+ j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
*var_ipp = htonl(minip + j % (maxip - minip + 1));
}
@@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
- iph->check);
+ iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+ iph->check);
iph->saddr = target->src.ip;
} else {
- iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
- iph->check);
+ iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+ iph->check);
iph->daddr = target->dst.ip;
}
return 1;
@@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
EXPORT_SYMBOL_GPL(ip_nat_packet);
/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
struct {
struct icmphdr icmp;
@@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
} *inside;
struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
+ enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
@@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
- hdrlen = (*pskb)->nh.iph->ihl * 4;
- if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0)))
- return 0;
- }
+ if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ return 0;
/* Must be RELATED */
IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
!manip))
return 0;
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen,
+ 0));
+ }
/* Change outer to look the reply to an incoming packet
* (proto 0 means don't invert per-proto part). */
@@ -550,9 +534,9 @@ int
ip_nat_port_range_to_nfattr(struct sk_buff *skb,
const struct ip_nat_range *range)
{
- NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
&range->min.tcp.port);
- NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
&range->max.tcp.port);
return 0;
@@ -571,7 +555,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
if (tb[CTA_PROTONAT_PORT_MIN-1]) {
ret = 1;
range->min.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
}
if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
@@ -580,7 +564,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
} else {
ret = 1;
range->max.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
}
return ret;
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 3328fc5c5f50..a71c233d8112 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -34,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper");
static int
mangle_rfc959_packet(struct sk_buff **pskb,
- u_int32_t newip,
+ __be32 newip,
u_int16_t port,
unsigned int matchoff,
unsigned int matchlen,
@@ -57,7 +57,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
/* |1|132.235.1.2|6275| */
static int
mangle_eprt_packet(struct sk_buff **pskb,
- u_int32_t newip,
+ __be32 newip,
u_int16_t port,
unsigned int matchoff,
unsigned int matchlen,
@@ -79,7 +79,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
/* |1|132.235.1.2|6275| */
static int
mangle_epsv_packet(struct sk_buff **pskb,
- u_int32_t newip,
+ __be32 newip,
u_int16_t port,
unsigned int matchoff,
unsigned int matchlen,
@@ -98,7 +98,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
matchlen, buffer, strlen(buffer));
}
-static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
+static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
unsigned int,
unsigned int,
struct ip_conntrack *,
@@ -120,7 +120,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
struct ip_conntrack_expect *exp,
u32 *seq)
{
- u_int32_t newip;
+ __be32 newip;
u_int16_t port;
int dir = CTINFO2DIR(ctinfo);
struct ip_conntrack *ct = exp->master;
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 5d506e0564d5..3bf858480558 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -15,7 +15,6 @@
* - make ip_nat_resize_packet more generic (TCP and UDP)
* - add ip_nat_mangle_udp_packet
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/types.h>
@@ -28,16 +27,12 @@
#include <net/tcp.h>
#include <net/udp.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -166,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
{
struct iphdr *iph;
struct tcphdr *tcph;
- int datalen;
+ int oldlen, datalen;
if (!skb_make_writable(pskb, (*pskb)->len))
return 0;
@@ -181,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
iph = (*pskb)->nh.iph;
tcph = (void *)iph + iph->ihl*4;
+ oldlen = (*pskb)->len - iph->ihl*4;
mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
datalen = (*pskb)->len - iph->ihl*4;
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
- csum_partial((char *)tcph, datalen, 0));
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, datalen,
+ iph->saddr, iph->daddr,
+ csum_partial((char *)tcph,
+ datalen, 0));
+ } else
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(oldlen) ^ htons(0xFFFF),
+ htons(datalen),
+ tcph->check, 1);
if (rep_len != match_len) {
set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -222,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
{
struct iphdr *iph;
struct udphdr *udph;
+ int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
iph = (*pskb)->nh.iph;
@@ -239,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
iph = (*pskb)->nh.iph;
udph = (void *)iph + iph->ihl*4;
+
+ oldlen = (*pskb)->len - iph->ihl*4;
mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* update the length of the UDP packet */
- udph->len = htons((*pskb)->len - iph->ihl*4);
+ datalen = (*pskb)->len - iph->ihl*4;
+ udph->len = htons(datalen);
/* fix udp checksum if udp checksum was previously calculated */
- if (udph->check) {
- int datalen = (*pskb)->len - iph->ihl * 4;
+ if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+ return 1;
+
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
udph->check = 0;
udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
datalen, IPPROTO_UDP,
csum_partial((char *)udph,
datalen, 0));
- }
-
+ if (!udph->check)
+ udph->check = -1;
+ } else
+ udph->check = nf_proto_csum_update(*pskb,
+ htons(oldlen) ^ htons(0xFFFF),
+ htons(datalen),
+ udph->check, 1);
return 1;
}
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -268,37 +283,38 @@ sack_adjust(struct sk_buff *skb,
struct ip_nat_seq *natseq)
{
while (sackoff < sackend) {
- struct tcp_sack_block *sack;
- u_int32_t new_start_seq, new_end_seq;
+ struct tcp_sack_block_wire *sack;
+ __be32 new_start_seq, new_end_seq;
sack = (void *)skb->data + sackoff;
if (after(ntohl(sack->start_seq) - natseq->offset_before,
natseq->correction_pos))
- new_start_seq = ntohl(sack->start_seq)
- - natseq->offset_after;
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_after);
else
- new_start_seq = ntohl(sack->start_seq)
- - natseq->offset_before;
- new_start_seq = htonl(new_start_seq);
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_before);
if (after(ntohl(sack->end_seq) - natseq->offset_before,
natseq->correction_pos))
- new_end_seq = ntohl(sack->end_seq)
- - natseq->offset_after;
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_after);
else
- new_end_seq = ntohl(sack->end_seq)
- - natseq->offset_before;
- new_end_seq = htonl(new_end_seq);
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_before);
DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
ntohl(sack->start_seq), new_start_seq,
ntohl(sack->end_seq), new_end_seq);
- tcph->check =
- ip_nat_cheat_check(~sack->start_seq, new_start_seq,
- ip_nat_cheat_check(~sack->end_seq,
- new_end_seq,
- tcph->check));
+ tcph->check = nf_proto_csum_update(skb,
+ ~sack->start_seq,
+ new_start_seq,
+ tcph->check, 0);
+ tcph->check = nf_proto_csum_update(skb,
+ ~sack->end_seq,
+ new_end_seq,
+ tcph->check, 0);
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
@@ -357,7 +373,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo)
{
struct tcphdr *tcph;
- int dir, newseq, newack;
+ int dir;
+ __be32 newseq, newack;
struct ip_nat_seq *this_way, *other_way;
dir = CTINFO2DIR(ctinfo);
@@ -370,22 +387,20 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
if (after(ntohl(tcph->seq), this_way->correction_pos))
- newseq = ntohl(tcph->seq) + this_way->offset_after;
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
else
- newseq = ntohl(tcph->seq) + this_way->offset_before;
- newseq = htonl(newseq);
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
other_way->correction_pos))
- newack = ntohl(tcph->ack_seq) - other_way->offset_after;
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
else
- newack = ntohl(tcph->ack_seq) - other_way->offset_before;
- newack = htonl(newack);
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
- tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
- ip_nat_cheat_check(~tcph->ack_seq,
- newack,
- tcph->check));
+ tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+ tcph->check, 0);
+ tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+ tcph->check, 0);
DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
index 419b878fb467..4a7d34466ee2 100644
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ b/net/ipv4/netfilter/ip_nat_helper_h323.c
@@ -32,13 +32,13 @@
/****************************************************************************/
static int set_addr(struct sk_buff **pskb,
unsigned char **data, int dataoff,
- unsigned int addroff, u_int32_t ip, u_int16_t port)
+ unsigned int addroff, __be32 ip, u_int16_t port)
{
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
struct {
- u_int32_t ip;
- u_int16_t port;
+ __be32 ip;
+ __be16 port;
} __attribute__ ((__packed__)) buf;
struct tcphdr _tcph, *th;
@@ -86,7 +86,7 @@ static int set_addr(struct sk_buff **pskb,
static int set_h225_addr(struct sk_buff **pskb,
unsigned char **data, int dataoff,
TransportAddress * addr,
- u_int32_t ip, u_int16_t port)
+ __be32 ip, u_int16_t port)
{
return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
}
@@ -95,7 +95,7 @@ static int set_h225_addr(struct sk_buff **pskb,
static int set_h245_addr(struct sk_buff **pskb,
unsigned char **data, int dataoff,
H245_TransportAddress * addr,
- u_int32_t ip, u_int16_t port)
+ __be32 ip, u_int16_t port)
{
return set_addr(pskb, data, dataoff,
addr->unicastAddress.iPAddress.network, ip, port);
@@ -110,7 +110,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
int dir = CTINFO2DIR(ctinfo);
int i;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
for (i = 0; i < count; i++) {
@@ -164,7 +164,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
{
int dir = CTINFO2DIR(ctinfo);
int i;
- u_int32_t ip;
+ __be32 ip;
u_int16_t port;
for (i = 0; i < count; i++) {
@@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
int dir = CTINFO2DIR(ctinfo);
u_int16_t nated_port = port;
- u_int32_t ip;
+ __be32 ip;
/* Set expectations for NAT */
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index f3977726ff09..329fdcd7d702 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,10 +32,9 @@
* 2005-06-10 - Version 3.0
* - kernel >= 2.6.11 version,
* funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
+ *
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/ip.h>
#include <linux/tcp.h>
@@ -52,7 +51,7 @@
#define IP_NAT_PPTP_VERSION "3.0"
-#define REQ_CID(req, off) (*(u_int16_t *)((char *)(req) + (off)))
+#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
@@ -86,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
DEBUGP("we are PNS->PAC\n");
/* therefore, build tuple for PAC->PNS */
t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+ t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+ t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
t.dst.protonum = IPPROTO_GRE;
} else {
DEBUGP("we are PAC->PNS\n");
/* build tuple for PNS->PAC */
t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- t.src.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pns_call_id);
+ t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- t.dst.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pac_call_id);
+ t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
t.dst.protonum = IPPROTO_GRE;
}
@@ -150,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb,
{
struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_callid;
+ u_int16_t msg;
+ __be16 new_callid;
unsigned int cid_off;
- new_callid = htons(ct_pptp_info->pns_call_id);
-
+ new_callid = ct_pptp_info->pns_call_id;
+
switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = ntohs(new_callid);
- break;
- case PPTP_IN_CALL_REPLY:
- cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+ (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
}
/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -213,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb,
return NF_ACCEPT;
}
-static int
+static void
pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
struct ip_conntrack_expect *expect_reply)
{
- struct ip_ct_pptp_master *ct_pptp_info =
- &expect_orig->master->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info =
- &expect_orig->master->nat.help.nat_pptp_info;
-
struct ip_conntrack *ct = expect_orig->master;
-
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_tuple *orig_t, *reply_t;
+ struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+ struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
/* save original PAC call ID in nat_info */
nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
- /* alter expectation */
- orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
/* alter expectation for PNS->PAC direction */
- invert_tuplepr(&inv_t, &expect_orig->tuple);
- expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
- expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
expect_orig->dir = IP_CT_DIR_ORIGINAL;
- inv_t.src.ip = reply_t->src.ip;
- inv_t.dst.ip = reply_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
-
- if (!ip_conntrack_expect_related(expect_orig)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_orig)\n");
- return 1;
- }
/* alter expectation for PAC->PNS direction */
- invert_tuplepr(&inv_t, &expect_reply->tuple);
- expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
expect_reply->dir = IP_CT_DIR_REPLY;
- inv_t.src.ip = orig_t->src.ip;
- inv_t.dst.ip = orig_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
-
- if (!ip_conntrack_expect_related(expect_reply)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_reply)\n");
- ip_conntrack_unexpect_related(expect_orig);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
- DEBUGP("can't register original keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
- DEBUGP("can't register reply keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- ip_ct_gre_keymap_destroy(ct);
- return 1;
- }
-
- return 0;
}
/* inbound packets == from PAC to PNS */
@@ -298,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb,
union pptp_ctrl_union *pptpReq)
{
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_cid = 0, new_pcid;
- unsigned int pcid_off, cid_off = 0;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
- new_pcid = htons(nat_pptp_info->pns_call_id);
+ new_pcid = nat_pptp_info->pns_call_id;
switch (msg = ntohs(ctlh->messageType)) {
case PPTP_OUT_CALL_REPLY:
pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
- cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
break;
case PPTP_IN_CALL_CONNECT:
pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -325,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
default:
- DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
+ DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
pptp_msg_name[msg]:pptp_msg_name[0]);
/* fall through */
@@ -352,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
sizeof(new_pcid), (char *)&new_pcid,
sizeof(new_pcid)) == 0)
return NF_DROP;
-
- if (new_cid) {
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_cid), (char *)&new_cid,
- sizeof(new_cid)) == 0)
- return NF_DROP;
- }
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 96ceabaec402..bf91f9312b3c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
* GRE is a generic encapsulation protocol, which is generally not very
* suited for NAT, as it has no protocol-specific part as port numbers.
*
- * It has an optional key field, which may help us distinguishing two
+ * It has an optional key field, which may help us distinguishing two
* connections between the same two hosts.
*
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
*
* PPTP is built on top of a modified version of GRE, and has a mandatory
* field called "CallID", which serves us for the same purpose as the key
@@ -23,7 +23,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/ip.h>
#include <linux/netfilter_ipv4/ip_nat.h>
@@ -61,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
}
/* generate unique tuple ... */
-static int
+static int
gre_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range,
enum ip_nat_manip_type maniptype,
const struct ip_conntrack *conntrack)
{
static u_int16_t key;
- u_int16_t *keyptr;
+ __be16 *keyptr;
unsigned int min, i, range_size;
if (maniptype == IP_NAT_MANIP_SRC)
@@ -85,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
range_size = ntohs(range->max.gre.key) - min + 1;
}
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
+ DEBUGP("min = %u, range_size = %u\n", min, range_size);
for (i = 0; i < range_size; i++, key++) {
*keyptr = htons(min + key % range_size);
@@ -118,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb,
greh = (void *)(*pskb)->data + hdroff;
pgreh = (struct gre_hdr_pptp *) greh;
- /* we only have destination manip of a packet, since 'source key'
+ /* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype == IP_NAT_MANIP_DST) {
/* key manipulation is always dest */
@@ -130,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb,
}
if (greh->csum) {
/* FIXME: Never tested this code... */
- *(gre_csum(greh)) =
- ip_nat_cheat_check(~*(gre_key(greh)),
+ *(gre_csum(greh)) =
+ nf_proto_csum_update(*pskb,
+ ~*(gre_key(greh)),
tuple->dst.u.gre.key,
- *(gre_csum(greh)));
+ *(gre_csum(greh)), 0);
}
*(gre_key(greh)) = tuple->dst.u.gre.key;
break;
case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
+ DEBUGP("call_id -> 0x%04x\n",
ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
@@ -152,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb,
}
/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
+static struct ip_nat_protocol gre = {
+ .name = "GRE",
.protonum = IPPROTO_GRE,
.manip_pkt = gre_manip_pkt,
.in_range = gre_in_range,
@@ -164,7 +164,7 @@ static struct ip_nat_protocol gre = {
.nfattr_to_range = ip_nat_port_nfattr_to_range,
#endif
};
-
+
int __init ip_nat_proto_gre_init(void)
{
return ip_nat_protocol_register(&gre);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4ccb99c..3f6efc13ac74 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
return 0;
hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
- hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
- tuple->src.u.icmp.id,
- hdr->checksum);
+ hdr->checksum = nf_proto_csum_update(*pskb,
+ hdr->un.echo.id ^ htons(0xFFFF),
+ tuple->src.u.icmp.id,
+ hdr->checksum, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d14079eba6..12deb13b93b1 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -24,7 +24,7 @@ tcp_in_range(const struct ip_conntrack_tuple *tuple,
const union ip_conntrack_manip_proto *min,
const union ip_conntrack_manip_proto *max)
{
- u_int16_t port;
+ __be16 port;
if (maniptype == IP_NAT_MANIP_SRC)
port = tuple->src.u.tcp.port;
@@ -42,7 +42,7 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *conntrack)
{
static u_int16_t port;
- u_int16_t *portptr;
+ __be16 *portptr;
unsigned int range_size, min, i;
if (maniptype == IP_NAT_MANIP_SRC)
@@ -93,8 +93,8 @@ tcp_manip_pkt(struct sk_buff **pskb,
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, newip;
- u16 *portptr, newport, oldport;
+ __be32 oldip, newip;
+ __be16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (hdrsize < sizeof(*hdr))
return 1;
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
+ hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+ hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport,
+ hdr->check, 0);
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053fdc867..4bbec7730d18 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -24,7 +24,7 @@ udp_in_range(const struct ip_conntrack_tuple *tuple,
const union ip_conntrack_manip_proto *min,
const union ip_conntrack_manip_proto *max)
{
- u_int16_t port;
+ __be16 port;
if (maniptype == IP_NAT_MANIP_SRC)
port = tuple->src.u.udp.port;
@@ -42,7 +42,7 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *conntrack)
{
static u_int16_t port;
- u_int16_t *portptr;
+ __be16 *portptr;
unsigned int range_size, min, i;
if (maniptype == IP_NAT_MANIP_SRC)
@@ -91,8 +91,8 @@ udp_manip_pkt(struct sk_buff **pskb,
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, newip;
- u16 *portptr, newport;
+ __be32 oldip, newip;
+ __be16 *portptr, newport;
if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
- if (hdr->check) /* 0 is a special case meaning no checksum */
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
+
+ if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+ hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+ hdr->check, 1);
+ hdr->check = nf_proto_csum_update(*pskb,
+ *portptr ^ htons(0xFFFF), newport,
+ hdr->check, 0);
+ if (!hdr->check)
+ hdr->check = -1;
+ }
*portptr = newport;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926c1cb0..a176aa3031e0 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
#include <net/route.h>
#include <linux/bitops.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct ipt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -124,7 +119,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
}
/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
{
static int warned = 0;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
@@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct ipt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename,
const void *entry,
const struct ipt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ip_nat_multi_range_compat *mr = targinfo;
@@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename,
const void *entry,
const struct ipt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ip_nat_multi_range_compat *mr = targinfo;
@@ -213,7 +205,7 @@ alloc_null_binding(struct ip_conntrack *conntrack,
per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
Use reply in case it's already been mangled (eg local packet).
*/
- u_int32_t ip
+ __be32 ip
= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
: conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
@@ -230,7 +222,7 @@ alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
struct ip_nat_info *info,
unsigned int hooknum)
{
- u_int32_t ip
+ __be32 ip
= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
: conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
@@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
{
int ret;
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+ ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
if (ret == NF_ACCEPT) {
if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
index 6ffba63adca2..71fc2730a007 100644
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ b/net/ipv4/netfilter/ip_nat_sip.c
@@ -60,8 +60,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
unsigned int bufflen, dataoff;
- u_int32_t ip;
- u_int16_t port;
+ __be32 ip;
+ __be16 port;
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
@@ -159,7 +159,7 @@ static int mangle_content_len(struct sk_buff **pskb,
static unsigned int mangle_sdp(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
struct ip_conntrack *ct,
- u_int32_t newip, u_int16_t port,
+ __be32 newip, u_int16_t port,
const char *dptr)
{
char buffer[sizeof("nnn.nnn.nnn.nnn")];
@@ -195,7 +195,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
{
struct ip_conntrack *ct = exp->master;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- u_int32_t newip;
+ __be32 newip;
u_int16_t port;
DEBUGP("ip_nat_sdp():\n");
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index d20d557f915a..168f45fa1898 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -43,7 +43,6 @@
* 2000-08-06: Convert to new helper API (Harald Welte).
*
*/
-#include <linux/config.h>
#include <linux/in.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -1212,7 +1211,7 @@ static int snmp_translate(struct ip_conntrack *ct,
struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
int dir = CTINFO2DIR(ctinfo);
@@ -1256,9 +1255,9 @@ static int help(struct sk_buff **pskb,
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
return NF_ACCEPT;
- if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT;
/* No NAT? */
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 67e676783da9..021395b67463 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -18,7 +18,6 @@
* - now capable of multiple expectations for one master
* */
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/icmp.h>
#include <linux/ip.h>
@@ -31,9 +30,6 @@
#include <net/checksum.h>
#include <linux/spinlock.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -41,7 +37,6 @@
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -111,11 +106,6 @@ ip_nat_fn(unsigned int hooknum,
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
& htons(IP_MF|IP_OFFSET)));
- /* If we had a hardware checksum before, it's now invalid */
- if ((*pskb)->ip_summed == CHECKSUM_HW)
- if (skb_checksum_help(*pskb, (out == NULL)))
- return NF_DROP;
-
ct = ip_conntrack_get(*pskb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
have dropped it. Hence it's the user's responsibilty to
@@ -146,8 +136,8 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
- CTINFO2DIR(ctinfo)))
+ if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+ hooknum, pskb))
return NF_DROP;
else
return NF_ACCEPT;
@@ -201,7 +191,7 @@ ip_nat_in(unsigned int hooknum,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
- u_int32_t daddr = (*pskb)->nh.iph->daddr;
+ __be32 daddr = (*pskb)->nh.iph->daddr;
ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index b93f0494362f..7edad790478a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@ struct ipq_queue_entry {
typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
static LIST_HEAD(queue_list);
static DEFINE_MUTEX(ipqnl_mutex);
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
break;
case IPQ_COPY_PACKET:
- if (entry->skb->ip_summed == CHECKSUM_HW &&
- (*errp = skb_checksum_help(entry->skb,
- entry->info->outdev == NULL))) {
+ if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+ entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+ (*errp = skb_checksum_help(entry->skb))) {
read_unlock_bh(&queue_lock);
return NULL;
}
@@ -457,11 +457,19 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
if (entry->info->indev)
if (entry->info->indev->ifindex == ifindex)
return 1;
-
if (entry->info->outdev)
if (entry->info->outdev->ifindex == ifindex)
return 1;
-
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ if (entry->skb->nf_bridge->physindev &&
+ entry->skb->nf_bridge->physindev->ifindex == ifindex)
+ return 1;
+ if (entry->skb->nf_bridge->physoutdev &&
+ entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ return 1;
+ }
+#endif
return 0;
}
@@ -507,7 +515,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (type <= IPQM_BASE)
return;
- if (security_netlink_recv(skb))
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
write_lock_bh(&queue_lock);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cee3397ec277..78a44b01c035 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -14,7 +14,6 @@
* 08 Oct 2005 Harald Welte <lafore@netfilter.org>
* - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
*/
-#include <linux/config.h>
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
@@ -181,8 +180,7 @@ ipt_error(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
if (net_ratelimit())
printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -218,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct ipt_table *table,
- void *userdata)
+ struct ipt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
u_int16_t offset;
@@ -231,7 +228,7 @@ ipt_do_table(struct sk_buff **pskb,
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
- struct xt_table_info *private = table->private;
+ struct xt_table_info *private;
/* Initialization */
ip = (*pskb)->nh.iph;
@@ -248,6 +245,7 @@ ipt_do_table(struct sk_buff **pskb,
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ private = table->private;
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb,
in, out,
hook,
t->u.kernel.target,
- t->data,
- userdata);
+ t->data);
#ifdef CONFIG_NETFILTER_DEBUG
if (((struct ipt_entry *)table_base)->comefrom
@@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
return 1;
if (m->u.kernel.match->destroy)
- m->u.kernel.match->destroy(m->u.kernel.match, m->data,
- m->u.match_size - sizeof(*m));
+ m->u.kernel.match->destroy(m->u.kernel.match, m->data);
module_put(m->u.kernel.match->me);
return 0;
}
@@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m,
if (m->u.kernel.match->checkentry
&& !m->u.kernel.match->checkentry(name, ip, match, m->data,
- m->u.match_size - sizeof(*m),
hookmask)) {
duprintf("ip_tables: check failed for `%s'.\n",
m->u.kernel.match->name);
@@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
if (t->u.kernel.target == &ipt_standard_target) {
if (!standard_check(t, size)) {
ret = -EINVAL;
- goto cleanup_matches;
+ goto err;
}
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
- t->u.target_size
- - sizeof(*t),
e->comefrom)) {
duprintf("ip_tables: check failed for `%s'.\n",
t->u.kernel.target->name);
@@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
IPT_MATCH_ITERATE(e, cleanup_match, NULL);
t = ipt_get_target(e);
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
return 0;
}
@@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset)
return delta;
}
-struct compat_ipt_standard_target
+static void compat_standard_from_user(void *dst, void *src)
{
- struct compat_xt_entry_target target;
- compat_int_t verdict;
-};
+ int v = *(compat_int_t *)src;
-struct compat_ipt_standard
-{
- struct compat_ipt_entry entry;
- struct compat_ipt_standard_target target;
-};
-
-#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target))
-#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
-#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN)
+ if (v > 0)
+ v += compat_calc_jump(v);
+ memcpy(dst, &v, sizeof(v));
+}
-static int compat_ipt_standard_fn(void *target,
- void **dstptr, int *size, int convert)
+static int compat_standard_to_user(void __user *dst, void *src)
{
- struct compat_ipt_standard_target compat_st, *pcompat_st;
- struct ipt_standard_target st, *pst;
- int ret;
+ compat_int_t cv = *(int *)src;
- ret = 0;
- switch (convert) {
- case COMPAT_TO_USER:
- pst = target;
- memcpy(&compat_st.target, &pst->target,
- sizeof(compat_st.target));
- compat_st.verdict = pst->verdict;
- if (compat_st.verdict > 0)
- compat_st.verdict -=
- compat_calc_jump(compat_st.verdict);
- compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
- if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
- ret = -EFAULT;
- *size -= IPT_ST_OFFSET;
- *dstptr += IPT_ST_COMPAT_LEN;
- break;
- case COMPAT_FROM_USER:
- pcompat_st = target;
- memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
- st.verdict = pcompat_st->verdict;
- if (st.verdict > 0)
- st.verdict += compat_calc_jump(st.verdict);
- st.target.u.user.target_size = IPT_ST_LEN;
- memcpy(*dstptr, &st, IPT_ST_LEN);
- *size += IPT_ST_OFFSET;
- *dstptr += IPT_ST_LEN;
- break;
- case COMPAT_CALC_SIZE:
- *size += IPT_ST_OFFSET;
- break;
- default:
- ret = -ENOPROTOOPT;
- break;
- }
- return ret;
+ if (cv > 0)
+ cv -= compat_calc_jump(cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
}
static inline int
compat_calc_match(struct ipt_entry_match *m, int * size)
{
- if (m->u.kernel.match->compat)
- m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
- else
- xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+ *size += xt_compat_match_offset(m->u.kernel.match);
return 0;
}
@@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
entry_offset = (void *)e - base;
IPT_MATCH_ITERATE(e, compat_calc_match, &off);
t = ipt_get_target(e);
- if (t->u.kernel.target->compat)
- t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
- else
- xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+ off += xt_compat_target_offset(t->u.kernel.target);
newinfo->size -= off;
ret = compat_add_offset(entry_offset, off);
if (ret)
@@ -1422,17 +1366,13 @@ struct compat_ipt_replace {
static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
void __user **dstptr, compat_uint_t *size)
{
- if (m->u.kernel.match->compat)
- return m->u.kernel.match->compat(m, dstptr, size,
- COMPAT_TO_USER);
- else
- return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
+ return xt_compat_match_to_user(m, dstptr, size);
}
static int compat_copy_entry_to_user(struct ipt_entry *e,
void __user **dstptr, compat_uint_t *size)
{
- struct ipt_entry_target __user *t;
+ struct ipt_entry_target *t;
struct compat_ipt_entry __user *ce;
u_int16_t target_offset, next_offset;
compat_uint_t origsize;
@@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
if (ret)
goto out;
t = ipt_get_target(e);
- if (t->u.kernel.target->compat)
- ret = t->u.kernel.target->compat(t, dstptr, size,
- COMPAT_TO_USER);
- else
- ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
+ ret = xt_compat_target_to_user(t, dstptr, size);
if (ret)
goto out;
ret = -EFAULT;
@@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
return match ? PTR_ERR(match) : -ENOENT;
}
m->u.kernel.match = match;
-
- if (m->u.kernel.match->compat)
- m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
- else
- xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+ *size += xt_compat_match_offset(match);
(*i)++;
return 0;
@@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
e->comefrom, &off, &j);
if (ret != 0)
- goto out;
+ goto cleanup_matches;
t = ipt_get_target(e);
target = try_then_request_module(xt_find_target(AF_INET,
@@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
if (IS_ERR(target) || !target) {
duprintf("check_entry: `%s' not found\n", t->u.user.name);
ret = target ? PTR_ERR(target) : -ENOENT;
- goto out;
+ goto cleanup_matches;
}
t->u.kernel.target = target;
- if (t->u.kernel.target->compat)
- t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
- else
- xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+ off += xt_compat_target_offset(target);
*size += off;
ret = compat_add_offset(entry_offset, off);
if (ret)
@@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
(*i)++;
return 0;
+
out:
+ module_put(t->u.kernel.target->me);
+cleanup_matches:
IPT_MATCH_ITERATE(e, cleanup_match, &j);
return ret;
}
static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
void **dstptr, compat_uint_t *size, const char *name,
- const struct ipt_ip *ip, unsigned int hookmask)
+ const struct ipt_ip *ip, unsigned int hookmask, int *i)
{
struct ipt_entry_match *dm;
struct ipt_match *match;
@@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
dm = (struct ipt_entry_match *)*dstptr;
match = m->u.kernel.match;
- if (match->compat)
- match->compat(m, dstptr, size, COMPAT_FROM_USER);
- else
- xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
+ xt_compat_match_from_user(m, dstptr, size);
ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
name, hookmask, ip->proto,
ip->invflags & IPT_INV_PROTO);
if (ret)
- return ret;
+ goto err;
if (m->u.kernel.match->checkentry
&& !m->u.kernel.match->checkentry(name, ip, match, dm->data,
- dm->u.match_size - sizeof(*dm),
hookmask)) {
duprintf("ip_tables: check failed for `%s'.\n",
m->u.kernel.match->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
+ (*i)++;
return 0;
+
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
}
static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
struct ipt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h;
+ int ret, h, j;
ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
+ j = 0;
*dstptr += sizeof(struct compat_ipt_entry);
ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
- name, &de->ip, de->comefrom);
+ name, &de->ip, de->comefrom, &j);
if (ret)
- goto out;
+ goto cleanup_matches;
de->target_offset = e->target_offset - (origsize - *size);
t = ipt_get_target(e);
target = t->u.kernel.target;
- if (target->compat)
- target->compat(t, dstptr, size, COMPAT_FROM_USER);
- else
- xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
+ xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
name, e->comefrom, e->ip.proto,
e->ip.invflags & IPT_INV_PROTO);
if (ret)
- goto out;
+ goto err;
ret = -EINVAL;
if (t->u.kernel.target == &ipt_standard_target) {
if (!standard_check(t, *size))
- goto out;
+ goto err;
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, de, target,
- t->data, t->u.target_size - sizeof(*t),
- de->comefrom)) {
+ t->data, de->comefrom)) {
duprintf("ip_tables: compat: check failed for `%s'.\n",
t->u.kernel.target->name);
- goto out;
+ goto err;
}
ret = 0;
-out:
+ return ret;
+
+err:
+ module_put(t->u.kernel.target->me);
+cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
return ret;
}
@@ -1761,7 +1697,7 @@ translate_compat_table(const char *name,
goto free_newinfo;
/* And one copy for every other CPU */
- for_each_cpu(i)
+ for_each_possible_cpu(i)
if (newinfo->entries[i] && newinfo->entries[i] != entry1)
memcpy(newinfo->entries[i], entry1, newinfo->size);
@@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
return ret;
}
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
static int
compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
@@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = compat_get_entries(user, len);
break;
default:
- duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
- ret = -EINVAL;
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
}
return ret;
}
@@ -2113,7 +2050,8 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
return ret;
}
- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+ ret = xt_register_table(table, &bootstrap, newinfo);
+ if (ret != 0) {
xt_free_table_info(newinfo);
return ret;
}
@@ -2184,7 +2122,6 @@ icmp_checkentry(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2199,7 +2136,9 @@ static struct ipt_target ipt_standard_target = {
.targetsize = sizeof(int),
.family = AF_INET,
#ifdef CONFIG_COMPAT
- .compat = &compat_ipt_standard_fn,
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
#endif
};
@@ -2239,22 +2178,39 @@ static int __init ip_tables_init(void)
{
int ret;
- xt_proto_init(AF_INET);
+ ret = xt_proto_init(AF_INET);
+ if (ret < 0)
+ goto err1;
/* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(&ipt_standard_target);
- xt_register_target(&ipt_error_target);
- xt_register_match(&icmp_matchstruct);
+ ret = xt_register_target(&ipt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&ipt_error_target);
+ if (ret < 0)
+ goto err3;
+ ret = xt_register_match(&icmp_matchstruct);
+ if (ret < 0)
+ goto err4;
/* Register setsockopt */
ret = nf_register_sockopt(&ipt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err5;
printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
return 0;
+
+err5:
+ xt_unregister_match(&icmp_matchstruct);
+err4:
+ xt_unregister_target(&ipt_error_target);
+err3:
+ xt_unregister_target(&ipt_standard_target);
+err2:
+ xt_proto_fini(AF_INET);
+err1:
+ return ret;
}
static void __exit ip_tables_fini(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index dbc83c5d7aa6..7a29d6e7baa7 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -10,7 +10,6 @@
*
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/proc_fs.h>
#include <linux/jhash.h>
#include <linux/bitops.h>
@@ -53,7 +52,7 @@ struct clusterip_config {
atomic_t entries; /* number of entries/rules
* referencing us */
- u_int32_t clusterip; /* the IP address */
+ __be32 clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
struct net_device *dev; /* device */
u_int16_t num_total_nodes; /* total number of nodes */
@@ -120,7 +119,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
}
static struct clusterip_config *
-__clusterip_config_find(u_int32_t clusterip)
+__clusterip_config_find(__be32 clusterip)
{
struct list_head *pos;
@@ -137,7 +136,7 @@ __clusterip_config_find(u_int32_t clusterip)
}
static inline struct clusterip_config *
-clusterip_config_find_get(u_int32_t clusterip, int entry)
+clusterip_config_find_get(__be32 clusterip, int entry)
{
struct clusterip_config *c;
@@ -167,17 +166,16 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
}
static struct clusterip_config *
-clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
+clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
char buffer[16];
- c = kmalloc(sizeof(*c), GFP_ATOMIC);
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return NULL;
- memset(c, 0, sizeof(*c));
c->dev = dev;
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
@@ -304,8 +302,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
enum ip_conntrack_info ctinfo;
@@ -375,7 +372,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -391,7 +387,7 @@ checkentry(const char *tablename,
return 0;
}
- if (e->ip.dmsk.s_addr != 0xffffffff
+ if (e->ip.dmsk.s_addr != htonl(0xffffffff)
|| e->ip.dst.s_addr == 0) {
printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
return 0;
@@ -452,8 +448,7 @@ checkentry(const char *tablename,
}
/* drop reference count of cluster config when rule is deleted */
-static void destroy(const struct xt_target *target, void *targinfo,
- unsigned int targinfosize)
+static void destroy(const struct xt_target *target, void *targinfo)
{
struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -481,9 +476,9 @@ static struct ipt_target clusterip_tgt = {
/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
struct arp_payload {
u_int8_t src_hw[ETH_ALEN];
- u_int32_t src_ip;
+ __be32 src_ip;
u_int8_t dst_hw[ETH_ALEN];
- u_int32_t dst_ip;
+ __be32 dst_ip;
} __attribute__ ((packed));
#ifdef CLUSTERIP_DEBUG
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e971288dfe..000000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
-*/
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_DSCP_info *dinfo = targinfo;
- u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-
- if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
- u_int16_t diffs[2];
-
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
- return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
- | sh_dscp;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^ 0xFFFF));
- }
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const void *e_void,
- const struct xt_target *target,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
-
- if ((dscp > IPT_DSCP_MAX)) {
- printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
- return 0;
- }
- return 1;
-}
-
-static struct ipt_target ipt_dscp_reg = {
- .name = "DSCP",
- .target = target,
- .targetsize = sizeof(struct ipt_DSCP_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
- return ipt_register_target(&ipt_dscp_reg);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
- ipt_unregister_target(&ipt_dscp_reg);
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9d34f5..12a818a2462f 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module");
static inline int
set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
- if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
- != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
- u_int16_t diffs[2];
+ struct iphdr *iph = (*pskb)->nh.iph;
+ __be16 oldtos;
+ if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return 0;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
- (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
+ iph = (*pskb)->nh.iph;
+ oldtos = iph->tos;
+ iph->tos &= ~IPT_ECN_IP_MASK;
+ iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos,
+ iph->check);
}
return 1;
}
/* Return 0 if there was an error. */
static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
struct tcphdr _tcph, *tcph;
- u_int16_t diffs[2];
+ __be16 oldval;
/* Not enought header? */
tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
return 0;
tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, inward))
- return 0;
-
- diffs[0] = ((u_int16_t *)tcph)[6];
+ oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
tcph->ece = einfo->proto.tcp.ece;
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- diffs[1] = ((u_int16_t *)tcph)[6];
- diffs[0] = diffs[0] ^ 0xFFFF;
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
- tcph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- tcph->check^0xFFFF));
+ tcph->check = nf_proto_csum_update((*pskb),
+ oldval ^ htons(0xFFFF),
+ ((__be16 *)tcph)[6],
+ tcph->check, 0);
return 1;
}
@@ -95,8 +85,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_ECN_info *einfo = targinfo;
@@ -106,7 +95,7 @@ target(struct sk_buff **pskb,
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
&& (*pskb)->nh.iph->protocol == IPPROTO_TCP)
- if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+ if (!set_ect_tcp(pskb, einfo))
return NF_DROP;
return IPT_CONTINUE;
@@ -117,7 +106,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b08b084..7dc820df8bc5 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_log_info *loginfo = targinfo;
struct nf_loginfo li;
@@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 8b3e7f99b861..3dbfcfac8a84 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -9,7 +9,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -43,7 +42,6 @@ masquerade_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -65,15 +63,14 @@ masquerade_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
const struct ip_nat_multi_range_compat *mr;
struct ip_nat_range newrange;
struct rtable *rt;
- u_int32_t newsrc;
+ __be32 newsrc;
IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 2fcf1075b027..58a88f227108 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -10,7 +10,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/netdevice.h>
@@ -34,7 +33,6 @@ check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -56,12 +54,11 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t new_ip, netmask;
+ __be32 new_ip, netmask;
const struct ip_nat_multi_range_compat *mr = targinfo;
struct ip_nat_range newrange;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463232de..c0dcfe9d610c 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@ redirect_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -58,12 +57,11 @@ redirect_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t newdst;
+ __be32 newdst;
const struct ip_nat_multi_range_compat *mr = targinfo;
struct ip_nat_range newrange;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 431a3ce6f7b7..fd0c05efed8a 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -12,7 +12,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
@@ -91,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
fl.proto = IPPROTO_TCP;
fl.fl_ip_sport = tcph->dest;
fl.fl_ip_dport = tcph->source;
+ security_skb_classify_flow(skb, &fl);
xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
@@ -104,8 +104,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
struct iphdr *iph = oldskb->nh.iph;
struct tcphdr _otcph, *oth, *tcph;
struct rtable *rt;
- u_int16_t tmp_port;
- u_int32_t tmp_addr;
+ __be16 tmp_port;
+ __be32 tmp_addr;
int needs_ack;
int hh_len;
@@ -185,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->urg_ptr = 0;
/* Adjust TCP checksum */
+ nskb->ip_summed = CHECKSUM_NONE;
tcph->check = 0;
tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
nskb->nh.iph->saddr,
@@ -227,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_reject_info *reject = targinfo;
@@ -276,7 +276,6 @@ static int check(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09b5a67..b38b13328d73 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@ same_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@ same_check(const char *tablename,
}
static void
-same_destroy(const struct xt_target *target, void *targinfo,
- unsigned int targinfosize)
+same_destroy(const struct xt_target *target, void *targinfo)
{
struct ipt_same_info *mr = targinfo;
@@ -133,12 +131,12 @@ same_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t tmpip, aindex, new_ip;
+ u_int32_t tmpip, aindex;
+ __be32 new_ip;
const struct ipt_same_info *same = targinfo;
struct ip_nat_range newrange;
const struct ip_conntrack_tuple *t;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b3f0d8..108b6b76311f 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,26 +21,14 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables TCP MSS modification module");
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-
static inline unsigned int
optlen(const u_int8_t *opt, unsigned int offset)
{
/* Beware zero-length options: make finite progress */
- if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
- else return opt[offset+1];
+ if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+ return 1;
+ else
+ return opt[offset+1];
}
static unsigned int
@@ -49,26 +37,21 @@ ipt_tcpmss_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
struct tcphdr *tcph;
struct iphdr *iph;
- u_int16_t tcplen, newtotlen, oldval, newmss;
+ u_int16_t tcplen, newmss;
+ __be16 newtotlen, oldval;
unsigned int i;
u_int8_t *opt;
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, out == NULL))
- return NF_DROP;
-
iph = (*pskb)->nh.iph;
tcplen = (*pskb)->len - iph->ihl*4;
-
tcph = (void *)iph + iph->ihl*4;
/* Since it passed flags test in tcp match, we know it is is
@@ -84,54 +67,41 @@ ipt_tcpmss_target(struct sk_buff **pskb,
return NF_DROP;
}
- if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
- if(!(*pskb)->dst) {
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+ if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
+ sizeof(struct tcphdr)) {
if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+ printk(KERN_ERR "ipt_tcpmss_target: "
+ "unknown or invalid path-MTU (%d)\n",
+ dst_mtu((*pskb)->dst));
return NF_DROP; /* or IPT_CONTINUE ?? */
}
- if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
- return NF_DROP; /* or IPT_CONTINUE ?? */
- }
-
- newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+ newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
+ sizeof(struct tcphdr);
} else
newmss = tcpmssinfo->mss;
opt = (u_int8_t *)tcph;
- for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
- if ((opt[i] == TCPOPT_MSS) &&
- ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
- (opt[i+1] == TCPOLEN_MSS)) {
+ for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+ if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+ opt[i+1] == TCPOLEN_MSS) {
u_int16_t oldmss;
oldmss = (opt[i+2] << 8) | opt[i+3];
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- (oldmss <= newmss))
- return IPT_CONTINUE;
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+ oldmss <= newmss)
+ return IPT_CONTINUE;
opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = (newmss & 0x00ff);
- tcph->check = cheat_check(htons(oldmss)^0xFFFF,
- htons(newmss),
- tcph->check);
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu changed TCP MSS option"
- " (from %u to %u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- oldmss, newmss);
- goto retmodified;
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(oldmss)^htons(0xFFFF),
+ htons(newmss),
+ tcph->check, 0);
+ return IPT_CONTINUE;
}
}
@@ -143,13 +113,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
TCPOLEN_MSS, GFP_ATOMIC);
- if (!newskb) {
- if (net_ratelimit())
- printk(KERN_ERR "ipt_tcpmss_target:"
- " unable to allocate larger skb\n");
+ if (!newskb)
return NF_DROP;
- }
-
kfree_skb(*pskb);
*pskb = newskb;
iph = (*pskb)->nh.iph;
@@ -161,36 +126,29 @@ ipt_tcpmss_target(struct sk_buff **pskb,
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
- tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
- htons(tcplen + TCPOLEN_MSS), tcph->check);
- tcplen += TCPOLEN_MSS;
-
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(tcplen) ^ htons(0xFFFF),
+ htons(tcplen + TCPOLEN_MSS),
+ tcph->check, 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = (newmss & 0x00ff);
- tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+ tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt),
+ tcph->check, 0);
- oldval = ((u_int16_t *)tcph)[6];
+ oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
- tcph->check = cheat_check(oldval ^ 0xFFFF,
- ((u_int16_t *)tcph)[6], tcph->check);
+ tcph->check = nf_proto_csum_update(*pskb,
+ oldval ^ htons(0xFFFF),
+ ((__be16 *)tcph)[6],
+ tcph->check, 0);
newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
- iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
- newtotlen, iph->check);
+ iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF),
+ newtotlen, iph->check);
iph->tot_len = newtotlen;
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- newmss);
-
- retmodified:
return IPT_CONTINUE;
}
@@ -200,9 +158,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
{
const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
- if (strcmp(m->u.kernel.match->name, "tcp") == 0
- && (tcpinfo->flg_cmp & TH_SYN)
- && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+ if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+ tcpinfo->flg_cmp & TH_SYN &&
+ !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
return 1;
return 0;
@@ -214,17 +172,17 @@ ipt_tcpmss_checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
const struct ipt_entry *e = e_void;
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- ((hook_mask & ~((1 << NF_IP_FORWARD)
- | (1 << NF_IP_LOCAL_OUT)
- | (1 << NF_IP_POST_ROUTING))) != 0)) {
- printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+ (hook_mask & ~((1 << NF_IP_FORWARD) |
+ (1 << NF_IP_LOCAL_OUT) |
+ (1 << NF_IP_POST_ROUTING))) != 0) {
+ printk("TCPMSS: path-MTU clamping only supported in "
+ "FORWARD, OUTPUT and POSTROUTING hooks\n");
return 0;
}
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca399b3..6b8b14ccc3d3 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,27 +26,20 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ __be16 oldtos;
- if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
- u_int16_t diffs[2];
-
+ if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos
- = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
- | tosinfo->tos;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
+ iph = (*pskb)->nh.iph;
+ oldtos = iph->tos;
+ iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
+ iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos,
+ iph->check);
}
return IPT_CONTINUE;
}
@@ -56,7 +49,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892ae0be5..ac9517d62af0 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,11 +23,10 @@ static unsigned int
ipt_ttl_target(struct sk_buff **pskb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct iphdr *iph;
const struct ipt_TTL_info *info = targinfo;
- u_int16_t diffs[2];
int new_ttl;
if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb,
}
if (new_ttl != iph->ttl) {
- diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+ iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF),
+ htons(new_ttl << 8),
+ iph->check);
iph->ttl = new_ttl;
- diffs[1] = htons(((unsigned)iph->ttl) << 8);
- iph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- iph->check^0xFFFF));
}
return IPT_CONTINUE;
@@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c84cc03389d8..2b104ea54f48 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -47,7 +47,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
@@ -116,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum)
del_timer(&ub->timer);
}
+ if (!ub->skb) {
+ DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+ return;
+ }
+
/* last nlmsg needs NLMSG_DONE */
if (ub->qlen > 1)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
@@ -304,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
@@ -342,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hookmask)
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 893dae210b04..7b60eb74788b 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("iptables addrtype match");
-static inline int match_type(u_int32_t addr, u_int16_t mask)
+static inline int match_type(__be32 addr, u_int16_t mask)
{
return !!(mask & (1 << inet_addr_type(addr)));
}
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135873d7..1798f86bc534 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@ checkentry(const char *tablename,
const void *ip_void,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 47177591aeb6..000000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* IP tables module for matching the value of the IPv4 DSCP field
- *
- * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- const struct xt_match *match, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_dscp_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
-
- u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
- return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
-}
-
-static struct ipt_match dscp_match = {
- .name = "dscp",
- .match = match,
- .matchsize = sizeof(struct ipt_dscp_info),
- .me = THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
- return ipt_register_match(&dscp_match);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
- ipt_unregister_match(&dscp_match);
-
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b28250414933..dafbdec0efc0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb,
static int checkentry(const char *tablename, const void *ip_void,
const struct xt_match *match,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
+ void *matchinfo, unsigned int hook_mask)
{
const struct ipt_ecn_info *info = matchinfo;
const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 92980ab8ce48..33ccdbf8e794 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -50,11 +50,11 @@ static struct file_operations dl_file_ops;
/* hash table crap */
struct dsthash_dst {
- u_int32_t src_ip;
- u_int32_t dst_ip;
+ __be32 src_ip;
+ __be32 dst_ip;
/* ports have to be consecutive !!! */
- u_int16_t src_port;
- u_int16_t dst_port;
+ __be16 src_port;
+ __be16 dst_port;
};
struct dsthash_ent {
@@ -106,8 +106,10 @@ static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
static inline u_int32_t
hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
{
- return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port),
- dst->src_ip, ht->rnd) % ht->cfg.size);
+ return (jhash_3words((__force u32)dst->dst_ip,
+ ((__force u32)dst->dst_port<<16 |
+ (__force u32)dst->src_port),
+ (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size);
}
static inline struct dsthash_ent *
@@ -406,7 +408,7 @@ hashlimit_match(const struct sk_buff *skb,
dst.src_ip = skb->nh.iph->saddr;
if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT
||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) {
- u_int16_t _ports[2], *ports;
+ __be16 _ports[2], *ports;
switch (skb->nh.iph->protocol) {
case IPPROTO_TCP:
@@ -454,15 +456,12 @@ hashlimit_match(const struct sk_buff *skb,
dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
hinfo->cfg.burst);
dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
- spin_unlock_bh(&hinfo->lock);
- return 1;
+ } else {
+ /* update expiration timeout */
+ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_recalc(dh, now);
}
- /* update expiration timeout */
- dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
- rateinfo_recalc(dh, now);
if (dh->rateinfo.credit >= dh->rateinfo.cost) {
/* We're underlimit. */
dh->rateinfo.credit -= dh->rateinfo.cost;
@@ -481,7 +480,6 @@ hashlimit_checkentry(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct ipt_hashlimit_info *r = matchinfo;
@@ -508,6 +506,9 @@ hashlimit_checkentry(const char *tablename,
if (!r->cfg.expire)
return 0;
+ if (r->name[sizeof(r->name) - 1] != '\0')
+ return 0;
+
/* This is the best we've got: We cannot release and re-grab lock,
* since checkentry() is called before ip_tables.c grabs ipt_mutex.
* We also cannot grab the hashtable spinlock, since htable_create will
@@ -529,18 +530,46 @@ hashlimit_checkentry(const char *tablename,
}
static void
-hashlimit_destroy(const struct xt_match *match, void *matchinfo,
- unsigned int matchsize)
+hashlimit_destroy(const struct xt_match *match, void *matchinfo)
{
struct ipt_hashlimit_info *r = matchinfo;
htable_put(r->hinfo);
}
+#ifdef CONFIG_COMPAT
+struct compat_ipt_hashlimit_info {
+ char name[IFNAMSIZ];
+ struct hashlimit_cfg cfg;
+ compat_uptr_t hinfo;
+ compat_uptr_t master;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+ int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+ memcpy(dst, src, off);
+ memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+ return copy_to_user(dst, src, off) ? -EFAULT : 0;
+}
+#endif
+
static struct ipt_match ipt_hashlimit = {
.name = "hashlimit",
.match = hashlimit_match,
.matchsize = sizeof(struct ipt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_hashlimit_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
.checkentry = hashlimit_checkentry,
.destroy = hashlimit_destroy,
.me = THIS_MODULE
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac023b5e..78c336f12a9e 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139f9cfd..126db44e71a8 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,20 +35,25 @@ static unsigned int ip_list_tot = 100;
static unsigned int ip_pkt_list_tot = 20;
static unsigned int ip_list_hash_size = 0;
static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
module_param(ip_list_tot, uint, 0400);
module_param(ip_pkt_list_tot, uint, 0400);
module_param(ip_list_hash_size, uint, 0400);
module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
-
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
struct recent_entry {
struct list_head list;
struct list_head lru_list;
- u_int32_t addr;
+ __be32 addr;
u_int8_t ttl;
u_int8_t index;
u_int16_t nstamps;
@@ -79,17 +84,17 @@ static struct file_operations recent_fops;
static u_int32_t hash_rnd;
static int hash_rnd_initted;
-static unsigned int recent_entry_hash(u_int32_t addr)
+static unsigned int recent_entry_hash(__be32 addr)
{
if (!hash_rnd_initted) {
get_random_bytes(&hash_rnd, 4);
hash_rnd_initted = 1;
}
- return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1);
+ return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
}
static struct recent_entry *
-recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl)
+recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
{
struct recent_entry *e;
unsigned int h;
@@ -110,7 +115,7 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
}
static struct recent_entry *
-recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl)
+recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
{
struct recent_entry *e;
@@ -172,7 +177,7 @@ ipt_recent_match(const struct sk_buff *skb,
const struct ipt_recent_info *info = matchinfo;
struct recent_table *t;
struct recent_entry *e;
- u_int32_t addr;
+ __be32 addr;
u_int8_t ttl;
int ret = info->invert;
@@ -232,7 +237,7 @@ out:
static int
ipt_recent_checkentry(const char *tablename, const void *ip,
const struct xt_match *match, void *matchinfo,
- unsigned int matchsize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
const struct ipt_recent_info *info = matchinfo;
struct recent_table *t;
@@ -274,6 +279,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
goto out;
}
t->proc->proc_fops = &recent_fops;
+ t->proc->uid = ip_list_uid;
+ t->proc->gid = ip_list_gid;
t->proc->data = t;
#endif
spin_lock_bh(&recent_lock);
@@ -286,8 +293,7 @@ out:
}
static void
-ipt_recent_destroy(const struct xt_match *match, void *matchinfo,
- unsigned int matchsize)
+ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
{
const struct ipt_recent_info *info = matchinfo;
struct recent_table *t;
@@ -399,7 +405,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
struct recent_table *t = pde->data;
struct recent_entry *e;
char buf[sizeof("+255.255.255.255")], *c = buf;
- u_int32_t addr;
+ __be32 addr;
int add;
if (size > sizeof(buf))
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f417484bfbf..e2e7dd8d7903 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_filter);
}
static unsigned int
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook,
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 397b95cc026b..e62ea2bb9c0a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -10,7 +10,6 @@
*
* Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netdevice.h>
@@ -120,7 +119,7 @@ ipt_route_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_mangler);
}
static unsigned int
@@ -132,7 +131,7 @@ ipt_local_hook(unsigned int hook,
{
unsigned int ret;
u_int8_t tos;
- u_int32_t saddr, daddr;
+ __be32 saddr, daddr;
unsigned long nfmark;
/* root is playing with raw sockets. */
@@ -149,7 +148,7 @@ ipt_local_hook(unsigned int hook,
daddr = (*pskb)->nh.iph->daddr;
tos = (*pskb)->nh.iph->tos;
- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
&& ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce1e1b8..bcbeb4aeacd9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_raw);
}
/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8cc8e1b36778..0af803df82b0 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -14,7 +14,6 @@
* Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73ee3f2f..790f00d500c3 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
-unsigned long nf_ct_icmp_timeout = 30*HZ;
+unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9d394d..9c6cbe3d9fb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 291831e792af..05f5114828ea 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -32,7 +32,6 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
-#include <linux/config.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bd221ec3f81e..b430cf2a4f66 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
-#include <linux/config.h>
+
#include <linux/types.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@@ -382,8 +381,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
int free = 0;
- u32 daddr;
- u32 saddr;
+ __be32 daddr;
+ __be32 saddr;
u8 tos;
int err;
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (!inet->hdrincl)
raw_probe_proto_opt(&fl, msg);
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
}
if (err)
@@ -609,6 +609,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sin) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cc9423de7311..c41ddba02e9d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -64,7 +64,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -105,6 +104,7 @@
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/ip_mp_alg.h>
+#include <net/netevent.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -206,21 +206,27 @@ __u8 ip_tos2prio[16] = {
struct rt_hash_bucket {
struct rtable *chain;
};
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
+ defined(CONFIG_PROVE_LOCKING)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
+ * (on lockdep we have a quite big spinlock_t, so keep the size down there)
*/
-#if NR_CPUS >= 32
-#define RT_HASH_LOCK_SZ 4096
-#elif NR_CPUS >= 16
-#define RT_HASH_LOCK_SZ 2048
-#elif NR_CPUS >= 8
-#define RT_HASH_LOCK_SZ 1024
-#elif NR_CPUS >= 4
-#define RT_HASH_LOCK_SZ 512
+#ifdef CONFIG_LOCKDEP
+# define RT_HASH_LOCK_SZ 256
#else
-#define RT_HASH_LOCK_SZ 256
+# if NR_CPUS >= 32
+# define RT_HASH_LOCK_SZ 4096
+# elif NR_CPUS >= 16
+# define RT_HASH_LOCK_SZ 2048
+# elif NR_CPUS >= 8
+# define RT_HASH_LOCK_SZ 1024
+# elif NR_CPUS >= 4
+# define RT_HASH_LOCK_SZ 512
+# else
+# define RT_HASH_LOCK_SZ 256
+# endif
#endif
static spinlock_t *rt_hash_locks;
@@ -244,7 +250,7 @@ static unsigned int rt_hash_rnd;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
- (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++)
+ (__raw_get_cpu_var(rt_cache_stat).field++)
static int rt_intern_hash(unsigned hash, struct rtable *rth,
struct rtable **res);
@@ -255,6 +261,10 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
& rt_hash_mask);
}
+#define rt_hash(daddr, saddr, idx) \
+ rt_hash_code((__force u32)(__be32)(daddr),\
+ (__force u32)(__be32)(saddr) ^ ((idx) << 5))
+
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
int bucket;
@@ -1068,7 +1078,7 @@ static void ip_select_fb_ident(struct iphdr *iph)
u32 salt;
spin_lock_bh(&ip_fb_id_lock);
- salt = secure_ip_id(ip_fallback_id ^ iph->daddr);
+ salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
iph->id = htons(salt & 0xFFFF);
ip_fallback_id = salt;
spin_unlock_bh(&ip_fb_id_lock);
@@ -1112,14 +1122,15 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
-void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
- u32 saddr, struct net_device *dev)
+void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
+ __be32 saddr, struct net_device *dev)
{
int i, k;
struct in_device *in_dev = in_dev_get(dev);
struct rtable *rth, **rthp;
- u32 skeys[2] = { saddr, 0 };
+ __be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
+ struct netevent_redirect netevent;
if (!in_dev)
return;
@@ -1140,8 +1151,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
for (i = 0; i < 2; i++) {
for (k = 0; k < 2; k++) {
- unsigned hash = rt_hash_code(daddr,
- skeys[i] ^ (ikeys[k] << 5));
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
rthp=&rt_hash_table[hash].chain;
@@ -1211,6 +1221,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
rt_drop(rt);
goto do_next;
}
+
+ netevent.old = &rth->u.dst;
+ netevent.new = &rt->u.dst;
+ call_netevent_notifiers(NETEVENT_REDIRECT,
+ &netevent);
rt_del(hash, rth);
if (!rt_intern_hash(hash, rt, &rt))
@@ -1248,9 +1263,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
ret = NULL;
} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
rt->u.dst.expires) {
- unsigned hash = rt_hash_code(rt->fl.fl4_dst,
- rt->fl.fl4_src ^
- (rt->fl.oif << 5));
+ unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
+ rt->fl.oif);
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ip_rt_advice: redirect to "
"%u.%u.%u.%u/%02x dropped\n",
@@ -1385,15 +1399,15 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
int i;
unsigned short old_mtu = ntohs(iph->tot_len);
struct rtable *rth;
- u32 skeys[2] = { iph->saddr, 0, };
- u32 daddr = iph->daddr;
+ __be32 skeys[2] = { iph->saddr, 0, };
+ __be32 daddr = iph->daddr;
unsigned short est_mtu = 0;
if (ipv4_config.no_pmtu_disc)
return 0;
for (i = 0; i < 2; i++) {
- unsigned hash = rt_hash_code(daddr, skeys[i]);
+ unsigned hash = rt_hash(daddr, skeys[i], 0);
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1447,6 +1461,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
}
dst->metrics[RTAX_MTU-1] = mtu;
dst_set_expires(dst, ip_rt_mtu_expires);
+ call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
@@ -1517,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb)
void ip_rt_get_source(u8 *addr, struct rtable *rt)
{
- u32 src;
+ __be32 src;
struct fib_result res;
if (rt->fl.iif == 0)
@@ -1583,12 +1598,12 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
rt->rt_type = res->type;
}
-static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, int our)
{
unsigned hash;
struct rtable *rth;
- u32 spec_dst;
+ __be32 spec_dst;
struct in_device *in_dev = in_dev_get(dev);
u32 itag = 0;
@@ -1652,7 +1667,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
RT_CACHE_STAT_INC(in_slow_mc);
in_dev_put(in_dev);
- hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5));
+ hash = rt_hash(daddr, saddr, dev->ifindex);
return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
e_nobufs:
@@ -1668,8 +1683,8 @@ e_inval:
static void ip_handle_martian_source(struct net_device *dev,
struct in_device *in_dev,
struct sk_buff *skb,
- u32 daddr,
- u32 saddr)
+ __be32 daddr,
+ __be32 saddr)
{
RT_CACHE_STAT_INC(in_martian_src);
#ifdef CONFIG_IP_ROUTE_VERBOSE
@@ -1699,7 +1714,7 @@ static void ip_handle_martian_source(struct net_device *dev,
static inline int __mkroute_input(struct sk_buff *skb,
struct fib_result* res,
struct in_device *in_dev,
- u32 daddr, u32 saddr, u32 tos,
+ __be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
@@ -1707,7 +1722,8 @@ static inline int __mkroute_input(struct sk_buff *skb,
int err;
struct in_device *out_dev;
unsigned flags = 0;
- u32 spec_dst, itag;
+ __be32 spec_dst;
+ u32 itag;
/* get a working reference to the output device */
out_dev = in_dev_get(FIB_RES_DEV(*res));
@@ -1800,7 +1816,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
struct fib_result* res,
const struct flowi *fl,
struct in_device *in_dev,
- u32 daddr, u32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos)
{
struct rtable* rth = NULL;
int err;
@@ -1817,7 +1833,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
return err;
/* put it into the cache */
- hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
+ hash = rt_hash(daddr, saddr, fl->iif);
return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
}
@@ -1825,7 +1841,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
struct fib_result* res,
const struct flowi *fl,
struct in_device *in_dev,
- u32 daddr, u32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
struct rtable* rth = NULL, *rtres;
@@ -1858,7 +1874,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
return err;
/* put it into the cache */
- hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
+ hash = rt_hash(daddr, saddr, fl->iif);
err = rt_intern_hash(hash, rth, &rtres);
if (err)
return err;
@@ -1888,7 +1904,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
* 2. IP spoofing attempts are filtered with 100% of guarantee.
*/
-static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
+static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
{
struct fib_result res;
@@ -1907,7 +1923,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
u32 itag = 0;
struct rtable * rth;
unsigned hash;
- u32 spec_dst;
+ __be32 spec_dst;
int err = -EINVAL;
int free_res = 0;
@@ -1923,7 +1939,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
goto martian_source;
- if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
+ if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
goto brd_input;
/* Accept zero addresses only to limited broadcast;
@@ -2035,7 +2051,7 @@ local_input:
rth->rt_flags &= ~RTCF_LOCAL;
}
rth->rt_type = res.type;
- hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5));
+ hash = rt_hash(daddr, saddr, fl.iif);
err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
goto done;
@@ -2074,7 +2090,7 @@ martian_source:
goto e_inval;
}
-int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
+int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
{
struct rtable * rth;
@@ -2082,7 +2098,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
int iif = dev->ifindex;
tos &= IPTOS_RT_MASK;
- hash = rt_hash_code(daddr, saddr ^ (iif << 5));
+ hash = rt_hash(daddr, saddr, iif);
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2156,7 +2172,7 @@ static inline int __mkroute_output(struct rtable **result,
if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
return -EINVAL;
- if (fl->fl4_dst == 0xFFFFFFFF)
+ if (fl->fl4_dst == htonl(0xFFFFFFFF))
res->type = RTN_BROADCAST;
else if (MULTICAST(fl->fl4_dst))
res->type = RTN_MULTICAST;
@@ -2280,8 +2296,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
unsigned hash;
if (err == 0) {
- hash = rt_hash_code(oldflp->fl4_dst,
- oldflp->fl4_src ^ (oldflp->oif << 5));
+ hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
err = rt_intern_hash(hash, rth, rp);
}
@@ -2323,9 +2338,8 @@ static inline int ip_mkroute_output(struct rtable** rp,
if (err != 0)
goto cleanup;
- hash = rt_hash_code(oldflp->fl4_dst,
- oldflp->fl4_src ^
- (oldflp->oif << 5));
+ hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
+ oldflp->oif);
err = rt_intern_hash(hash, rth, rp);
/* forward hop information to multipath impl. */
@@ -2404,7 +2418,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
*/
if (oldflp->oif == 0
- && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) {
+ && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2441,7 +2455,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
goto out; /* Wrong error code */
}
- if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) {
+ if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
if (!fl.fl4_src)
fl.fl4_src = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
@@ -2554,7 +2568,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
unsigned hash;
struct rtable *rth;
- hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5));
+ hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
rcu_read_lock_bh();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2626,51 +2640,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
{
struct rtable *rt = (struct rtable*)skb->dst;
struct rtmsg *r;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ struct nlmsghdr *nlh;
struct rta_cacheinfo ci;
-#ifdef CONFIG_IP_MROUTE
- struct rtattr *eptr;
-#endif
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
- r = NLMSG_DATA(nlh);
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ r = nlmsg_data(nlh);
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
r->rtm_tos = rt->fl.fl4_tos;
r->rtm_table = RT_TABLE_MAIN;
+ NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
r->rtm_type = rt->rt_type;
r->rtm_scope = RT_SCOPE_UNIVERSE;
r->rtm_protocol = RTPROT_UNSPEC;
r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
if (rt->rt_flags & RTCF_NOTIFY)
r->rtm_flags |= RTM_F_NOTIFY;
- RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+
+ NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
+
if (rt->fl.fl4_src) {
r->rtm_src_len = 32;
- RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+ NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
}
if (rt->u.dst.dev)
- RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+ NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
#ifdef CONFIG_NET_CLS_ROUTE
if (rt->u.dst.tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+ NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
- __u32 alg = rt->rt_multipath_alg;
-
- RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
- }
+ if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+ NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
#endif
if (rt->fl.iif)
- RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+ NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
else if (rt->rt_src != rt->fl.fl4_src)
- RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+ NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
+
if (rt->rt_dst != rt->rt_gateway)
- RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+ NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
+
if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
+
ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
ci.rta_used = rt->u.dst.__use;
ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
@@ -2687,13 +2704,10 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
}
}
-#ifdef CONFIG_IP_MROUTE
- eptr = (struct rtattr*)skb->tail;
-#endif
- RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
if (rt->fl.iif) {
#ifdef CONFIG_IP_MROUTE
- u32 dst = rt->rt_dst;
+ __be32 dst = rt->rt_dst;
if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
ipv4_devconf.mc_forwarding) {
@@ -2702,41 +2716,48 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
if (!nowait) {
if (err == 0)
return 0;
- goto nlmsg_failure;
+ goto nla_put_failure;
} else {
if (err == -EMSGSIZE)
- goto nlmsg_failure;
- ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+ goto nla_put_failure;
+ ci.rta_error = err;
}
}
} else
#endif
- RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+ NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
}
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
struct rtable *rt = NULL;
- u32 dst = 0;
- u32 src = 0;
- int iif = 0;
- int err = -ENOBUFS;
+ __be32 dst = 0;
+ __be32 src = 0;
+ u32 iif;
+ int err;
struct sk_buff *skb;
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ rtm = nlmsg_data(nlh);
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- goto out;
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
@@ -2747,62 +2768,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
skb->nh.iph->protocol = IPPROTO_ICMP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
- if (rta[RTA_SRC - 1])
- memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
- if (rta[RTA_DST - 1])
- memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
- if (rta[RTA_IIF - 1])
- memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+ src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
+ dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
+ iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
if (iif) {
- struct net_device *dev = __dev_get_by_index(iif);
- err = -ENODEV;
- if (!dev)
- goto out_free;
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(iif);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout_free;
+ }
+
skb->protocol = htons(ETH_P_IP);
skb->dev = dev;
local_bh_disable();
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
local_bh_enable();
- rt = (struct rtable*)skb->dst;
- if (!err && rt->u.dst.error)
+
+ rt = (struct rtable*) skb->dst;
+ if (err == 0 && rt->u.dst.error)
err = -rt->u.dst.error;
} else {
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
- .saddr = src,
- .tos = rtm->rtm_tos } } };
- int oif = 0;
- if (rta[RTA_OIF - 1])
- memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
- fl.oif = oif;
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dst,
+ .saddr = src,
+ .tos = rtm->rtm_tos,
+ },
+ },
+ .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+ };
err = ip_route_output_key(&rt, &fl);
}
+
if (err)
- goto out_free;
+ goto errout_free;
skb->dst = &rt->u.dst;
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
- if (!err)
- goto out_free;
- if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
- }
+ if (err <= 0)
+ goto errout_free;
- err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
- if (err > 0)
- err = 0;
-out: return err;
+ err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
+ return err;
-out_free:
+errout_free:
kfree_skb(skb);
- goto out;
+ goto errout;
}
int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3130,13 +3150,9 @@ int __init ip_rt_init(void)
}
#endif
- ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
- sizeof(struct rtable),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
-
- if (!ipv4_dst_ops.kmem_cachep)
- panic("IP: failed to allocate ip_dst_cache\n");
+ ipv4_dst_ops.kmem_cachep =
+ kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
@@ -3144,7 +3160,7 @@ int __init ip_rt_init(void)
rhash_entries,
(num_physpages >= 128 * 1024) ?
15 : 17,
- HASH_HIGHMEM,
+ 0,
&rt_hash_log,
&rt_hash_mask,
0);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f67..661e0a4bca72 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
if (!req)
goto out;
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+ goto out;
+ }
ireq = inet_rsk(req);
treq = tcp_rsk(req);
treq->rcv_isn = htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
.uli_u = { .ports =
{ .sport = skb->h.th->dest,
.dport = skb->h.th->source } } };
+ security_req_classify_flow(req, &fl);
if (ip_route_output_key(&rt, &fl)) {
reqsk_free(req);
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ce4cd5f35511..e82a5be894b5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -10,7 +10,6 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
-#include <linux/config.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <net/snmp.h>
@@ -18,6 +17,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
+#include <net/cipso_ipv4.h>
/* From af_inet.c */
extern int sysctl_ip_nonlocal_bind;
@@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
return ret;
}
+static int __init tcp_congestion_default(void)
+{
+ return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+}
+
+late_initcall(tcp_congestion_default);
ctl_table ipv4_table[] = {
{
@@ -698,6 +704,40 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+#ifdef CONFIG_NETLABEL
+ {
+ .ctl_name = NET_CIPSOV4_CACHE_ENABLE,
+ .procname = "cipso_cache_enable",
+ .data = &cipso_v4_cache_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE,
+ .procname = "cipso_cache_bucket_size",
+ .data = &cipso_v4_cache_bucketsize,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_RBM_OPTFMT,
+ .procname = "cipso_rbm_optfmt",
+ .data = &cipso_v4_rbm_optfmt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_RBM_STRICTVALID,
+ .procname = "cipso_rbm_strictvalid",
+ .data = &cipso_v4_rbm_strictvalid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif /* CONFIG_NETLABEL */
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0e029c4e2903..66e9a729f6df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -247,7 +247,6 @@
* TCP_CLOSE socket is finished
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fcntl.h>
@@ -269,7 +268,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
-int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
@@ -569,7 +568,7 @@ new_segment:
skb->truesize += copy;
sk->sk_wmem_queued += copy;
sk->sk_forward_alloc -= copy;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
skb_shinfo(skb)->gso_segs = 0;
@@ -643,7 +642,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp)
int tmp = tp->mss_cache;
if (sk->sk_route_caps & NETIF_F_SG) {
- if (sk->sk_route_caps & NETIF_F_TSO)
+ if (sk_can_gso(sk))
tmp = 0;
else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
@@ -724,7 +723,7 @@ new_segment:
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, tp, skb);
copy = size_goal;
@@ -956,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
* receive buffer and there was a small segment
* in queue.
*/
- (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
- !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+ (copied > 0 &&
+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+ !icsk->icsk_ack.pingpong)) &&
+ !atomic_read(&sk->sk_rmem_alloc)))
time_to_ack = 1;
}
@@ -1133,7 +1135,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
tp->ucopy.dma_chan = NULL;
preempt_disable();
if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
- !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
+ !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) {
preempt_enable_no_resched();
tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
} else
@@ -1660,7 +1662,8 @@ adjudge_to_death:
const int tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
+ inet_csk_reset_keepalive_timer(sk,
+ tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto out;
@@ -2145,7 +2148,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
@@ -2166,15 +2169,36 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
if (!pskb_may_pull(skb, thlen))
goto out;
- oldlen = ~htonl(skb->len);
+ oldlen = (u16)~skb->len;
__skb_pull(skb, thlen);
- segs = skb_segment(skb, sg);
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+ int mss;
+
+ if (unlikely(type &
+ ~(SKB_GSO_TCPV4 |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0) ||
+ !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
+ goto out;
+
+ mss = skb_shinfo(skb)->gso_size;
+ skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
+
+ segs = NULL;
+ goto out;
+ }
+
+ segs = skb_segment(skb, features);
if (IS_ERR(segs))
goto out;
len = skb_shinfo(skb)->gso_size;
- delta = csum_add(oldlen, htonl(thlen + len));
+ delta = htonl(oldlen + (thlen + len));
skb = segs;
th = skb->h.th;
@@ -2183,10 +2207,10 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
do {
th->fin = th->psh = 0;
- if (skb->ip_summed == CHECKSUM_NONE) {
- th->check = csum_fold(csum_partial(
- skb->h.raw, thlen, csum_add(skb->csum, delta)));
- }
+ th->check = ~csum_fold(th->check + delta);
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+ skb->csum));
seq += len;
skb = skb->next;
@@ -2196,15 +2220,16 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
th->cwr = 0;
} while (skb->next);
- if (skb->ip_summed == CHECKSUM_NONE) {
- delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw));
- th->check = csum_fold(csum_partial(
- skb->h.raw, thlen, csum_add(skb->csum, delta)));
- }
+ delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+ th->check = ~csum_fold(th->check + delta);
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+ skb->csum));
out:
return segs;
}
+EXPORT_SYMBOL(tcp_tso_segment);
extern void __skb_cb_too_small_for_tcp(int, int);
extern struct tcp_congestion_ops tcp_reno;
@@ -2232,9 +2257,7 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if (!tcp_hashinfo.bind_bucket_cachep)
- panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b2d9021ad22b..5730333cd0ac 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -12,7 +12,6 @@
* this behaves the same as the original Reno.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -232,7 +231,7 @@ static struct tcp_congestion_ops bictcp = {
static int __init bictcp_register(void)
{
- BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&bictcp);
}
diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
deleted file mode 100644
index bc54f7e9aea9..000000000000
--- a/net/ipv4/tcp_compound.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * TCP Vegas congestion control
- *
- * This is based on the congestion detection/avoidance scheme described in
- * Lawrence S. Brakmo and Larry L. Peterson.
- * "TCP Vegas: End to end congestion avoidance on a global internet."
- * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
- * October 1995. Available from:
- * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
- *
- * See http://www.cs.arizona.edu/xkernel/ for their implementation.
- * The main aspects that distinguish this implementation from the
- * Arizona Vegas implementation are:
- * o We do not change the loss detection or recovery mechanisms of
- * Linux in any way. Linux already recovers from losses quite well,
- * using fine-grained timers, NewReno, and FACK.
- * o To avoid the performance penalty imposed by increasing cwnd
- * only every-other RTT during slow start, we increase during
- * every RTT during slow start, just like Reno.
- * o Largely to allow continuous cwnd growth during slow start,
- * we use the rate at which ACKs come back as the "actual"
- * rate, rather than the rate at which data is sent.
- * o To speed convergence to the right rate, we set the cwnd
- * to achieve the right ("actual") rate when we exit slow start.
- * o To filter out the noise caused by delayed ACKs, we use the
- * minimum RTT sample observed during the last RTT to calculate
- * the actual rate.
- * o When the sender re-starts from idle, it waits until it has
- * received ACKs for an entire flight of new data before making
- * a cwnd adjustment decision. The original Vegas implementation
- * assumed senders never went idle.
- *
- *
- * TCP Compound based on TCP Vegas
- *
- * further details can be found here:
- * ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/inet_diag.h>
-
-#include <net/tcp.h>
-
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-
-#define TCP_COMPOUND_ALPHA 3U
-#define TCP_COMPOUND_BETA 1U
-#define TCP_COMPOUND_GAMMA 30
-#define TCP_COMPOUND_ZETA 1
-
-/* TCP compound variables */
-struct compound {
- u32 beg_snd_nxt; /* right edge during last RTT */
- u32 beg_snd_una; /* left edge during last RTT */
- u32 beg_snd_cwnd; /* saves the size of the cwnd */
- u8 doing_vegas_now; /* if true, do vegas for this RTT */
- u16 cntRTT; /* # of RTTs measured within last RTT */
- u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
- u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
-
- u32 cwnd;
- u32 dwnd;
-};
-
-/* There are several situations when we must "re-start" Vegas:
- *
- * o when a connection is established
- * o after an RTO
- * o after fast recovery
- * o when we send a packet and there is no outstanding
- * unacknowledged data (restarting an idle connection)
- *
- * In these circumstances we cannot do a Vegas calculation at the
- * end of the first RTT, because any calculation we do is using
- * stale info -- both the saved cwnd and congestion feedback are
- * stale.
- *
- * Instead we must wait until the completion of an RTT during
- * which we actually receive ACKs.
- */
-static inline void vegas_enable(struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct compound *vegas = inet_csk_ca(sk);
-
- /* Begin taking Vegas samples next time we send something. */
- vegas->doing_vegas_now = 1;
-
- /* Set the beginning of the next send window. */
- vegas->beg_snd_nxt = tp->snd_nxt;
-
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
-}
-
-/* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct sock *sk)
-{
- struct compound *vegas = inet_csk_ca(sk);
-
- vegas->doing_vegas_now = 0;
-}
-
-static void tcp_compound_init(struct sock *sk)
-{
- struct compound *vegas = inet_csk_ca(sk);
- const struct tcp_sock *tp = tcp_sk(sk);
-
- vegas->baseRTT = 0x7fffffff;
- vegas_enable(sk);
-
- vegas->dwnd = 0;
- vegas->cwnd = tp->snd_cwnd;
-}
-
-/* Do RTT sampling needed for Vegas.
- * Basically we:
- * o min-filter RTT samples from within an RTT to get the current
- * propagation delay + queuing delay (we are min-filtering to try to
- * avoid the effects of delayed ACKs)
- * o min-filter RTT samples from a much longer window (forever for now)
- * to find the propagation delay (baseRTT)
- */
-static void tcp_compound_rtt_calc(struct sock *sk, u32 usrtt)
-{
- struct compound *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
-
- /* Filter to find propagation delay: */
- if (vrtt < vegas->baseRTT)
- vegas->baseRTT = vrtt;
-
- /* Find the min RTT during the last RTT to find
- * the current prop. delay + queuing delay:
- */
-
- vegas->minRTT = min(vegas->minRTT, vrtt);
- vegas->cntRTT++;
-}
-
-static void tcp_compound_state(struct sock *sk, u8 ca_state)
-{
-
- if (ca_state == TCP_CA_Open)
- vegas_enable(sk);
- else
- vegas_disable(sk);
-}
-
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u64 div64_64(u64 dividend, u64 divisor)
-{
- u32 d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (u32) dividend / d;
-
- return dividend;
-}
-
-/* calculate the quartic root of "a" using Newton-Raphson */
-static u32 qroot(u64 a)
-{
- u32 x, x1;
-
- /* Initial estimate is based on:
- * qrt(x) = exp(log(x) / 4)
- */
- x = 1u << (fls64(a) >> 2);
-
- /*
- * Iteration based on:
- * 3
- * x = ( 3 * x + a / x ) / 4
- * k+1 k k
- */
- do {
- u64 x3 = x;
-
- x1 = x;
- x3 *= x;
- x3 *= x;
-
- x = (3 * x + (u32) div64_64(a, x3)) / 4;
- } while (abs(x1 - x) > 1);
-
- return x;
-}
-
-
-/*
- * If the connection is idle and we are restarting,
- * then we don't want to do any Vegas calculations
- * until we get fresh RTT samples. So when we
- * restart, we reset our Vegas state to a clean
- * slate. After we get acks for this flight of
- * packets, _then_ we can make Vegas calculations
- * again.
- */
-static void tcp_compound_cwnd_event(struct sock *sk, enum tcp_ca_event event)
-{
- if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
- tcp_compound_init(sk);
-}
-
-static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct compound *vegas = inet_csk_ca(sk);
- u8 inc = 0;
-
- if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) {
- if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) {
- vegas->cwnd = tp->snd_cwnd;
- vegas->dwnd = 0;
- } else
- vegas->cwnd = tp->snd_cwnd - vegas->dwnd;
-
- }
-
- if (!tcp_is_cwnd_limited(sk, in_flight))
- return;
-
- if (vegas->cwnd <= tp->snd_ssthresh)
- inc = 1;
- else if (tp->snd_cwnd_cnt < tp->snd_cwnd)
- tp->snd_cwnd_cnt++;
-
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- inc = 1;
- tp->snd_cwnd_cnt = 0;
- }
-
- if (inc && tp->snd_cwnd < tp->snd_cwnd_clamp)
- vegas->cwnd++;
-
- /* The key players are v_beg_snd_una and v_beg_snd_nxt.
- *
- * These are so named because they represent the approximate values
- * of snd_una and snd_nxt at the beginning of the current RTT. More
- * precisely, they represent the amount of data sent during the RTT.
- * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
- * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
- * bytes of data have been ACKed during the course of the RTT, giving
- * an "actual" rate of:
- *
- * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
- *
- * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
- * because delayed ACKs can cover more than one segment, so they
- * don't line up nicely with the boundaries of RTTs.
- *
- * Another unfortunate fact of life is that delayed ACKs delay the
- * advance of the left edge of our send window, so that the number
- * of bytes we send in an RTT is often less than our cwnd will allow.
- * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
- */
-
- if (after(ack, vegas->beg_snd_nxt)) {
- /* Do the Vegas once-per-RTT cwnd adjustment. */
- u32 old_wnd, old_snd_cwnd;
-
- /* Here old_wnd is essentially the window of data that was
- * sent during the previous RTT, and has all
- * been acknowledged in the course of the RTT that ended
- * with the ACK we just received. Likewise, old_snd_cwnd
- * is the cwnd during the previous RTT.
- */
- if (!tp->mss_cache)
- return;
-
- old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
- tp->mss_cache;
- old_snd_cwnd = vegas->beg_snd_cwnd;
-
- /* Save the extent of the current window so we can use this
- * at the end of the next RTT.
- */
- vegas->beg_snd_una = vegas->beg_snd_nxt;
- vegas->beg_snd_nxt = tp->snd_nxt;
- vegas->beg_snd_cwnd = tp->snd_cwnd;
-
- /* We do the Vegas calculations only if we got enough RTT
- * samples that we can be reasonably sure that we got
- * at least one RTT sample that wasn't from a delayed ACK.
- * If we only had 2 samples total,
- * then that means we're getting only 1 ACK per RTT, which
- * means they're almost certainly delayed ACKs.
- * If we have 3 samples, we should be OK.
- */
-
- if (vegas->cntRTT > 2) {
- u32 rtt, target_cwnd, diff;
- u32 brtt, dwnd;
-
- /* We have enough RTT samples, so, using the Vegas
- * algorithm, we determine if we should increase or
- * decrease cwnd, and by how much.
- */
-
- /* Pluck out the RTT we are using for the Vegas
- * calculations. This is the min RTT seen during the
- * last RTT. Taking the min filters out the effects
- * of delayed ACKs, at the cost of noticing congestion
- * a bit later.
- */
- rtt = vegas->minRTT;
-
- /* Calculate the cwnd we should have, if we weren't
- * going too fast.
- *
- * This is:
- * (actual rate in segments) * baseRTT
- * We keep it as a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary point.
- */
- if (!rtt)
- return;
-
- brtt = vegas->baseRTT;
- target_cwnd = ((old_wnd * brtt)
- << V_PARAM_SHIFT) / rtt;
-
- /* Calculate the difference between the window we had,
- * and the window we would like to have. This quantity
- * is the "Diff" from the Arizona Vegas papers.
- *
- * Again, this is a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary
- * point.
- */
-
- diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
-
- dwnd = vegas->dwnd;
-
- if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) {
- u64 v;
- u32 x;
-
- /*
- * The TCP Compound paper describes the choice
- * of "k" determines the agressiveness,
- * ie. slope of the response function.
- *
- * For same value as HSTCP would be 0.8
- * but for computaional reasons, both the
- * original authors and this implementation
- * use 0.75.
- */
- v = old_wnd;
- x = qroot(v * v * v) >> TCP_COMPOUND_ALPHA;
- if (x > 1)
- dwnd = x - 1;
- else
- dwnd = 0;
-
- dwnd += vegas->dwnd;
-
- } else if ((dwnd << V_PARAM_SHIFT) <
- (diff * TCP_COMPOUND_BETA))
- dwnd = 0;
- else
- dwnd =
- ((dwnd << V_PARAM_SHIFT) -
- (diff *
- TCP_COMPOUND_BETA)) >> V_PARAM_SHIFT;
-
- vegas->dwnd = dwnd;
-
- }
-
- /* Wipe the slate clean for the next RTT. */
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
- }
-
- tp->snd_cwnd = vegas->cwnd + vegas->dwnd;
-}
-
-/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_compound_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
-{
- const struct compound *ca = inet_csk_ca(sk);
- if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info *info;
-
- info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
- sizeof(*info)));
-
- info->tcpv_enabled = ca->doing_vegas_now;
- info->tcpv_rttcnt = ca->cntRTT;
- info->tcpv_rtt = ca->baseRTT;
- info->tcpv_minrtt = ca->minRTT;
- rtattr_failure:;
- }
-}
-
-static struct tcp_congestion_ops tcp_compound = {
- .init = tcp_compound_init,
- .ssthresh = tcp_reno_ssthresh,
- .cong_avoid = tcp_compound_cong_avoid,
- .rtt_sample = tcp_compound_rtt_calc,
- .set_state = tcp_compound_state,
- .cwnd_event = tcp_compound_cwnd_event,
- .get_info = tcp_compound_get_info,
-
- .owner = THIS_MODULE,
- .name = "compound",
-};
-
-static int __init tcp_compound_register(void)
-{
- BUG_ON(sizeof(struct compound) > ICSK_CA_PRIV_SIZE);
- tcp_register_congestion_control(&tcp_compound);
- return 0;
-}
-
-static void __exit tcp_compound_unregister(void)
-{
- tcp_unregister_congestion_control(&tcp_compound);
-}
-
-module_init(tcp_compound_register);
-module_exit(tcp_compound_unregister);
-
-MODULE_AUTHOR("Angelo P. Castellani, Stephen Hemminger");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("TCP Compound");
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 857eefc52aab..af0aca1e6be6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -6,7 +6,6 @@
* Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
@@ -49,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
ret = -EEXIST;
} else {
- list_add_rcu(&ca->list, &tcp_cong_list);
+ list_add_tail_rcu(&ca->list, &tcp_cong_list);
printk(KERN_INFO "TCP %s registered\n", ca->name);
}
spin_unlock(&tcp_cong_list_lock);
@@ -190,7 +189,7 @@ void tcp_slow_start(struct tcp_sock *tp)
return;
/* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 78b7a6b9e4de..a60ef38d75c6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -12,7 +12,6 @@
* this behaves the same as the original Reno.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -359,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = {
static int __init cubictcp_register(void)
{
- BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
/* Precompute a bunch of the scaling factors that are used per-packet
* based on SRTT of 100ms
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index c148c1081880..57c5f0b10e6c 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -11,7 +11,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/inet_diag.h>
@@ -26,7 +25,10 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_info *info = _info;
- r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq;
+ if (sk->sk_state == TCP_LISTEN)
+ r->idiag_rqueue = sk->sk_ack_backlog;
+ else
+ r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq;
r->idiag_wqueue = tp->write_seq - tp->snd_una;
if (info != NULL)
tcp_get_info(sk, info);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 1120245b2373..c4fc811bf377 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -6,7 +6,6 @@
* John Heffner <jheffner@psc.edu>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -140,14 +139,19 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
tp->snd_cwnd++;
}
} else {
- /* Update AIMD parameters */
+ /* Update AIMD parameters.
+ *
+ * We want to guarantee that:
+ * hstcp_aimd_vals[ca->ai-1].cwnd <
+ * snd_cwnd <=
+ * hstcp_aimd_vals[ca->ai].cwnd
+ */
if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
- ca->ai > 0)
+ } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
@@ -185,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
static int __init hstcp_register(void)
{
- BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_highspeed);
}
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 3d92c1859267..682e7d5b6f2f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -6,7 +6,6 @@
* http://www.hamilton.ie/net/htcp3.pdf
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -287,7 +286,7 @@ static struct tcp_congestion_ops htcp = {
static int __init htcp_register(void)
{
- BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
return tcp_register_congestion_control(&htcp);
}
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 40dbb3877510..59e691d26f64 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -10,7 +10,6 @@
* root at danielinux.net
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -171,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = {
static int __init hybla_register(void)
{
- BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_hybla);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94fe5b1f9dcb..3f884cea14ff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -63,7 +63,6 @@
* Pasi Sarolahti: F-RTO for dealing with spurious RTOs
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
@@ -73,24 +72,24 @@
#include <asm/unaligned.h>
#include <net/netdma.h>
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
-
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
-
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc = 1;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
+
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
+
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -128,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
/* skb->len may jitter because of SACKs, even if peer
* sends good full-sized frames.
*/
- len = skb->len;
+ len = skb_shinfo(skb)->gso_size ?: skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
icsk->icsk_ack.rcv_mss = len;
} else {
@@ -157,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
return;
}
}
+ if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
}
}
@@ -934,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
- struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+ struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
int reord = tp->packets_out;
int prior_fackets;
@@ -2238,13 +2239,12 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
-static u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(struct timeval *tv)
{
- struct timeval tv, now;
+ struct timeval now;
do_gettimeofday(&now);
- skb_get_timestamp(skb, &tv);
- return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+ return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
}
/* Remove acknowledged frames from the retransmission queue. */
@@ -2259,6 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
u32 pkts_acked = 0;
void (*rtt_sample)(struct sock *sk, u32 usrtt)
= icsk->icsk_ca_ops->rtt_sample;
+ struct timeval tv;
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
@@ -2307,8 +2308,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
seq_rtt = -1;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- if (rtt_sample)
- (*rtt_sample)(sk, tcp_usrtt(skb));
+ skb_get_timestamp(skb, &tv);
}
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2321,8 +2321,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- if (rtt_sample)
- (*rtt_sample)(sk, tcp_usrtt(skb));
+ skb_get_timestamp(skb, &tv);
}
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
@@ -2334,6 +2333,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
if (acked&FLAG_ACKED) {
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk, tp);
+ if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
+ (*rtt_sample)(sk, tcp_usrtt(&tv));
if (icsk->icsk_ca_ops->pkts_acked)
icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
@@ -2506,8 +2507,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una))
goto old_ack;
- if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
- tp->bytes_acked += ack - prior_snd_una;
+ if (sysctl_tcp_abc) {
+ if (icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+ else if (icsk->icsk_ca_state == TCP_CA_Loss)
+ /* we assume just one segment left network */
+ tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+ }
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
@@ -2623,7 +2629,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
switch(opcode) {
case TCPOPT_MSS:
if(opsize==TCPOLEN_MSS && th->syn && !estab) {
- u16 in_mss = ntohs(get_unaligned((__u16 *)ptr));
+ u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
if (in_mss) {
if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
in_mss = opt_rx->user_mss;
@@ -2651,8 +2657,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
if ((estab && opt_rx->tstamp_ok) ||
(!estab && sysctl_tcp_timestamps)) {
opt_rx->saw_tstamp = 1;
- opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr));
- opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4)));
+ opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+ opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
}
}
break;
@@ -2689,8 +2695,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
return 0;
} else if (tp->rx_opt.tstamp_ok &&
th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
- __u32 *ptr = (__u32 *)(th + 1);
- if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ __be32 *ptr = (__be32 *)(th + 1);
+ if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
tp->rx_opt.saw_tstamp = 1;
++ptr;
@@ -3542,7 +3548,8 @@ void tcp_cwnd_application_limited(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
- u32 win_used = max(tp->snd_cwnd_used, 2U);
+ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ u32 win_used = max(tp->snd_cwnd_used, init_win);
if (win_used < tp->snd_cwnd) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
@@ -3904,10 +3911,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
/* Check timestamp */
if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
- __u32 *ptr = (__u32 *)(th + 1);
+ __be32 *ptr = (__be32 *)(th + 1);
/* No? Slow path! */
- if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
goto slow_path;
@@ -4178,8 +4185,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/
TCP_ECN_rcv_synack(tp, th);
- if (tp->ecn_flags&TCP_ECN_OK)
- sock_set_flag(sk, SOCK_NO_LARGESEND);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH);
@@ -4322,8 +4327,6 @@ discard:
tp->max_window = tp->snd_wnd;
TCP_ECN_rcv_syn(tp, th);
- if (tp->ecn_flags&TCP_ECN_OK)
- sock_set_flag(sk, SOCK_NO_LARGESEND);
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 25ecc6e2478b..c83938b8fcb1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -52,7 +52,6 @@
* a single port at the same time.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/fcntl.h>
@@ -79,8 +78,8 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-int sysctl_tcp_tw_reuse;
-int sysctl_tcp_low_latency;
+int sysctl_tcp_tw_reuse __read_mostly;
+int sysctl_tcp_low_latency __read_mostly;
/* Check TCP sequence numbers in ICMP packets. */
#define ICMP_MIN_LENGTH 8
@@ -91,7 +90,7 @@ static struct socket *tcp_socket;
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
- .lhash_lock = RW_LOCK_UNLOCKED,
+ .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
};
@@ -160,7 +159,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
- u32 daddr, nexthop;
+ __be32 daddr, nexthop;
int tmp;
int err;
@@ -242,6 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
goto failure;
/* OK, now commit destination to socket. */
+ sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->u.dst);
if (!tp->write_seq)
@@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
It can f.e. if SYNs crossed.
*/
if (!sock_owned_by_user(sk)) {
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
@@ -485,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
struct inet_sock *inet = inet_sk(sk);
struct tcphdr *th = skb->h.th;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
skb->csum = offsetof(struct tcphdr, check);
} else {
@@ -496,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
}
}
+int tcp_v4_gso_send_check(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ iph = skb->nh.iph;
+ th = skb->h.th;
+
+ th->check = 0;
+ th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
+ skb->csum = offsetof(struct tcphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
+
/*
* This routine will send an RST to the other tcp.
*
@@ -717,8 +734,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct inet_request_sock *ireq;
struct tcp_options_received tmp_opt;
struct request_sock *req;
- __u32 saddr = skb->nh.iph->saddr;
- __u32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = skb->nh.iph->saddr;
+ __be32 daddr = skb->nh.iph->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
@@ -781,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
ireq = inet_rsk(req);
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
@@ -856,7 +876,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
drop_and_free:
reqsk_free(req);
drop:
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
return 0;
}
@@ -884,6 +903,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (!newsk)
goto exit;
+ newsk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(newsk, dst);
newtp = tcp_sk(newsk);
@@ -931,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
- th->source, skb->nh.iph->daddr,
- ntohs(th->dest), inet_iif(skb));
+ nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+ th->source, skb->nh.iph->daddr,
+ th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -953,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
static int tcp_v4_checksum_init(struct sk_buff *skb)
{
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
skb->nh.iph->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1070,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
- skb->nh.iph->daddr, ntohs(th->dest),
+ skb->nh.iph->daddr, th->dest,
inet_iif(skb));
if (!sk)
@@ -1084,12 +1104,12 @@ process:
goto discard_and_relse;
nf_reset(skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
#ifdef CONFIG_NET_DMA
@@ -1148,7 +1168,7 @@ do_time_wait:
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
skb->nh.iph->daddr,
- ntohs(th->dest),
+ th->dest,
inet_iif(skb));
if (sk2) {
inet_twsk_deschedule((struct inet_timewait_sock *)sk,
@@ -1621,10 +1641,9 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
if (unlikely(afinfo == NULL))
return -EINVAL;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
- memset(s, 0, sizeof(*s));
s->family = afinfo->family;
s->seq_ops.start = tcp_seq_start;
s->seq_ops.next = tcp_seq_next;
@@ -1726,7 +1745,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5d %8d %lu %d %p %u %u %u %u %d",
i, src, srcp, dest, destp, sp->sk_state,
- tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
+ tp->write_seq - tp->snd_una,
+ (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
timer_active,
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1743,7 +1763,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
{
- unsigned int dest, src;
+ __be32 dest, src;
__u16 destp, srcp;
int ttd = tw->tw_ttd - jiffies;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 1f977b6ee9a1..f0ebaf0e21cb 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -3,13 +3,8 @@
*
* TCP Low Priority is a distributed algorithm whose goal is to utilize only
* the excess network bandwidth as compared to the ``fair share`` of
- * bandwidth as targeted by TCP. Available from:
- * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
+ * bandwidth as targeted by TCP.
*
- * Original Author:
- * Aleksandar Kuzmanovic <akuzma@northwestern.edu>
- *
- * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation.
* As of 2.6.13, Linux supports pluggable congestion control algorithms.
* Due to the limitation of the API, we take the following changes from
* the original TCP-LP implementation:
@@ -24,14 +19,20 @@
* o OWD is handled in relative format, where local time stamp will in
* tcp_time_stamp format.
*
- * Port from 2.4.19 to 2.6.16 as module by:
- * Wong Hoi Sing Edison <hswong3i@gmail.com>
- * Hung Hing Lun <hlhung3i@gmail.com>
+ * Original Author:
+ * Aleksandar Kuzmanovic <akuzma@northwestern.edu>
+ * Available from:
+ * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
+ * Original implementation for 2.4.19:
+ * http://www-ece.rice.edu/networks/TCP-LP/
*
- * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $
+ * 2.6.x module Authors:
+ * Wong Hoi Sing, Edison <hswong3i@gmail.com>
+ * Hung Hing Lun, Mike <hlhung3i@gmail.com>
+ * SourceForge project page:
+ * http://tcp-lp-mod.sourceforge.net/
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -153,16 +154,19 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
if (m < 0)
m = -m;
- if (rhz != 0) {
+ if (rhz > 0) {
m -= rhz >> 6; /* m is now error in remote HZ est */
rhz += m; /* 63/64 old + 1/64 new */
} else
rhz = m << 6;
+ out:
/* record time for successful remote HZ calc */
- lp->flag |= LP_VALID_RHZ;
+ if ((rhz >> 6) > 0)
+ lp->flag |= LP_VALID_RHZ;
+ else
+ lp->flag &= ~LP_VALID_RHZ;
- out:
/* record reference time stamp */
lp->remote_ref_time = tp->rx_opt.rcv_tsval;
lp->local_ref_time = tp->rx_opt.rcv_tsecr;
@@ -321,7 +325,7 @@ static struct tcp_congestion_ops tcp_lp = {
static int __init tcp_lp_register(void)
{
- BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_lp);
}
@@ -333,6 +337,6 @@ static void __exit tcp_lp_unregister(void)
module_init(tcp_lp_register);
module_exit(tcp_lp_unregister);
-MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun");
+MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Low Priority");
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2b9b7f6c7f7c..0163d9826907 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -20,7 +20,6 @@
* Jorge Cwik, <jorge@laser.satlink.net>
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
@@ -35,13 +34,13 @@
#define SYNC_INIT 1
#endif
-int sysctl_tcp_syncookies = SYNC_INIT;
-int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
.period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = SPIN_LOCK_UNLOCKED,
+ .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
.hashinfo = &tcp_hashinfo,
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&tcp_death_row),
@@ -440,8 +439,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
- if (newtp->ecn_flags&TCP_ECN_OK)
- sock_set_flag(newsk, SOCK_NO_LARGESEND);
TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
}
@@ -592,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* RFC793: "second check the RST bit" and
* "fourth, check the SYN bit"
*/
- if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
+ }
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdd71db8bf90..9a253faefc81 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
#include <linux/smp_lock.h>
/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
+int sysctl_tcp_retrans_collapse __read_mostly = 1;
/* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity.
*/
-int sysctl_tcp_workaround_signed_windows = 0;
+int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
-int sysctl_tcp_tso_win_divisor = 3;
+int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-int sysctl_tcp_mtu_probing = 0;
-int sysctl_tcp_base_mss = 512;
+int sysctl_tcp_mtu_probing __read_mostly = 0;
+int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
-int sysctl_tcp_slow_start_after_idle = 1;
+int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
static void update_send_head(struct sock *sk, struct tcp_sock *tp,
struct sk_buff *skb)
@@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* See RFC1323 for an explanation of the limit to 14
*/
space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
@@ -268,7 +269,7 @@ static u16 tcp_select_window(struct sock *sk)
return new_win;
}
-static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
__u32 tstamp)
{
if (tp->rx_opt.tstamp_ok) {
@@ -304,7 +305,7 @@ static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
* MAX_SYN_SIZE to match the new maximum number of options that you
* can generate.
*/
-static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
int offer_wscale, int wscale, __u32 tstamp,
__u32 ts_recent)
{
@@ -423,7 +424,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->dest = inet->dport;
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
- *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
+ *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->flags);
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
@@ -444,7 +445,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_syn_build_options((__u32 *)(th + 1),
+ tcp_syn_build_options((__be32 *)(th + 1),
tcp_advertise_mss(sk),
(sysctl_flags & SYSCTL_FLAG_TSTAMPS),
(sysctl_flags & SYSCTL_FLAG_SACK),
@@ -453,7 +454,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcb->when,
tp->rx_opt.ts_recent);
} else {
- tcp_build_and_update_options((__u32 *)(th + 1),
+ tcp_build_and_update_options((__be32 *)(th + 1),
tp, tcb->when);
TCP_ECN_send(sk, tp, skb, tcp_header_size);
}
@@ -466,7 +467,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (skb->len != tcp_header_size)
tcp_event_data_sent(tp, skb, sk);
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
+ TCP_INC_STATS(TCP_MIB_OUTSEGS);
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
if (likely(err <= 0))
@@ -510,8 +512,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{
- if (skb->len <= mss_now ||
- !(sk->sk_route_caps & NETIF_F_TSO)) {
+ if (skb->len <= mss_now || !sk_can_gso(sk)) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -525,7 +526,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
factor /= mss_now;
skb_shinfo(skb)->gso_segs = factor;
skb_shinfo(skb)->gso_size = mss_now;
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type = sk->sk_gso_type;
}
}
@@ -576,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+ if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
/* Copy and checksum data tail into the new buffer. */
buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
nsize, 0);
@@ -585,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
skb->csum = csum_block_sub(skb->csum, buff->csum, len);
} else {
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
}
@@ -688,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
__pskb_trim_head(skb, len - skb_headlen(skb));
TCP_SKB_CB(skb)->seq += len;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb->truesize -= len;
sk->sk_wmem_queued -= len;
@@ -824,9 +825,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now = tp->mss_cache;
- if (large_allowed &&
- (sk->sk_route_caps & NETIF_F_TSO) &&
- !tp->urg_mode)
+ if (large_allowed && sk_can_gso(sk) && !tp->urg_mode)
doing_tso = 1;
if (dst) {
@@ -1063,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB(buff)->sacked = 0;
- buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+ buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
/* Fix up tso_factor for both original and new SKB. */
@@ -1207,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- if (skb->ip_summed == CHECKSUM_HW)
- nskb->ip_summed = CHECKSUM_HW;
+ nskb->ip_summed = skb->ip_summed;
len = 0;
while (len < probe_size) {
@@ -1232,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_partial(skb->data, skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
@@ -1573,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
- if (next_skb->ip_summed == CHECKSUM_HW)
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = next_skb->ip_summed;
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
/* Update sequence range on original skb. */
@@ -2044,8 +2041,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- if (dst->dev->features&NETIF_F_TSO)
- ireq->ecn_ok = 0;
TCP_ECN_make_synack(req, th);
th->source = inet_sk(sk)->sport;
th->dest = ireq->rmt_port;
@@ -2075,7 +2070,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->window = htons(req->rcv_wnd);
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
+ tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
TCP_SKB_CB(skb)->when,
req->ts_recent);
@@ -2162,10 +2157,9 @@ int tcp_connect(struct sock *sk)
skb_shinfo(buff)->gso_size = 0;
skb_shinfo(buff)->gso_type = 0;
buff->csum = 0;
+ tp->snd_nxt = tp->write_seq;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
- tp->snd_nxt = tp->write_seq;
- tp->pushed_seq = tp->write_seq;
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2175,6 +2169,12 @@ int tcp_connect(struct sock *sk)
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+
+ /* We change tp->snd_nxt after the tcp_transmit_skb() call
+ * in order to make this packet get counted in tcpOutSegs.
+ */
+ tp->snd_nxt = tp->write_seq;
+ tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d7d517a3a238..dab37d2f65fc 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file)
static ssize_t tcpprobe_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
- int error = 0, cnt;
+ int error = 0, cnt = 0;
unsigned char *tbuf;
if (!buf || len < 0)
@@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
error = wait_event_interruptible(tcpw.wait,
__kfifo_len(tcpw.fifo) != 0);
if (error)
- return error;
+ goto out_free;
cnt = kfifo_get(tcpw.fifo, tbuf, len);
error = copy_to_user(buf, tbuf, cnt);
+out_free:
vfree(tbuf);
return error ? error : cnt;
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 26d7486ee501..4624501e9680 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -5,7 +5,6 @@
* John Heffner <jheffner@sc.edu>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/tcp.h>
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3cd6cb..fb09ade5897b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
#include <linux/module.h>
#include <net/tcp.h>
-int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
-int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
-int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
-int sysctl_tcp_retries1 = TCP_RETR1;
-int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries;
+int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
+int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
+int sysctl_tcp_orphan_retries __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 3b7403495052..a3b7aa015a2f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -31,7 +31,6 @@
* assumed senders never went idle.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -371,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = {
static int __init tcp_vegas_register(void)
{
- BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_vegas);
return 0;
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7135c1..ce57bf302f6c 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
* See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = {
static int __init tcp_veno_register(void)
{
- BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_veno);
return 0;
}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4247da1384bf..4f42a86c77f3 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -21,7 +21,6 @@
* ssthresh after packet loss. The probing phase is as the original Reno.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -290,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = {
static int __init tcp_westwood_register(void)
{
- BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_westwood);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3f93292b0ad8..6d6142f9c478 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -91,7 +91,6 @@
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
-#include <linux/config.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
@@ -119,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
DEFINE_RWLOCK(udp_hash_lock);
-/* Shared by v4/v6 udp. */
-int udp_port_rover;
+static int udp_port_rover;
-static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_lport_inuse(u16 num)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+
+ sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+ if (inet_sk(sk)->num == num)
+ return 1;
+ return 0;
+}
+
+/**
+ * udp_get_port - common port lookup for IPv4 and IPv6
+ *
+ * @sk: socket struct in question
+ * @snum: port number to look up
+ * @saddr_comp: AF-dependent comparison of bound local IP addresses
+ */
+int udp_get_port(struct sock *sk, unsigned short snum,
+ int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
{
struct hlist_node *node;
+ struct hlist_head *head;
struct sock *sk2;
- struct inet_sock *inet = inet_sk(sk);
+ int error = 1;
write_lock_bh(&udp_hash_lock);
if (snum == 0) {
@@ -138,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
best_size_so_far = 32767;
best = result = udp_port_rover;
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
- struct hlist_head *list;
int size;
- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
- if (hlist_empty(list)) {
+ head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
((result - sysctl_local_port_range[0]) &
@@ -150,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
goto gotit;
}
size = 0;
- sk_for_each(sk2, node, list)
- if (++size >= best_size_so_far)
- goto next;
- best_size_so_far = size;
- best = result;
- next:;
+ sk_for_each(sk2, node, head)
+ if (++size < best_size_so_far) {
+ best_size_so_far = size;
+ best = result;
+ }
}
result = best;
for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -171,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
gotit:
udp_port_rover = snum = result;
} else {
- sk_for_each(sk2, node,
- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
- struct inet_sock *inet2 = inet_sk(sk2);
-
- if (inet2->num == snum &&
- sk2 != sk &&
- !ipv6_only_sock(sk2) &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!inet2->rcv_saddr ||
- !inet->rcv_saddr ||
- inet2->rcv_saddr == inet->rcv_saddr) &&
- (!sk2->sk_reuse || !sk->sk_reuse))
+ head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+
+ sk_for_each(sk2, node, head)
+ if (inet_sk(sk2)->num == snum &&
+ sk2 != sk &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_cmp)(sk, sk2) )
goto fail;
- }
}
- inet->num = snum;
+ inet_sk(sk)->num = snum;
if (sk_unhashed(sk)) {
- struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
-
- sk_add_node(sk, h);
+ head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
- write_unlock_bh(&udp_hash_lock);
- return 0;
-
+ error = 0;
fail:
write_unlock_bh(&udp_hash_lock);
- return 1;
+ return error;
+}
+
+static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+{
+ struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+
+ return ( !ipv6_only_sock(sk2) &&
+ (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+ inet1->rcv_saddr == inet2->rcv_saddr ));
}
+static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+ return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
+}
+
+
static void udp_v4_hash(struct sock *sk)
{
BUG();
@@ -221,8 +243,8 @@ static void udp_v4_unhash(struct sock *sk)
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport,
- u32 daddr, u16 dport, int dif)
+static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport, int dif)
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
@@ -266,8 +288,8 @@ static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport,
return result;
}
-static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport,
- u32 daddr, u16 dport, int dif)
+static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -280,8 +302,8 @@ static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport,
}
static inline struct sock *udp_v4_mcast_next(struct sock *sk,
- u16 loc_port, u32 loc_addr,
- u16 rmt_port, u32 rmt_addr,
+ __be16 loc_port, __be32 loc_addr,
+ __be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
@@ -430,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
/*
* Only one fragment on the socket.
*/
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb->csum = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
up->len, IPPROTO_UDP, 0);
@@ -449,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
* fragments on the socket so that all csums of sk_buffs
* should be together.
*/
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
int offset = (unsigned char *)uh - skb->data;
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -476,7 +498,7 @@ out:
}
-static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
+static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base)
{
return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
}
@@ -491,8 +513,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct rtable *rt = NULL;
int free = 0;
int connected = 0;
- u32 daddr, faddr, saddr;
- u16 dport;
+ __be32 daddr, faddr, saddr;
+ __be16 dport;
u8 tos;
int err;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
@@ -604,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
if (err)
goto out;
@@ -662,6 +685,16 @@ out:
UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
return len;
}
+ /*
+ * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
+ * ENOBUFS might not be good (it's not tunable per se), but otherwise
+ * we don't have a good statistic (IpOutDiscards but it can be too many
+ * things). We could add another new stat but at least for now that
+ * seems like overkill.
+ */
+ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+ }
return err;
do_confirm:
@@ -898,7 +931,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
int iphlen, len;
__u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
- __u32 *udpdata32 = (__u32 *)udpdata;
+ __be32 *udpdata32 = (__be32 *)udpdata;
__u16 encap_type = up->encap_type;
/* if we're overly short, let UDP handle it */
@@ -981,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
+ int rc;
/*
* Charge it to the socket, dropping if the queue is full.
@@ -1027,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (sock_queue_rcv_skb(sk,skb)<0) {
+ if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+ /* Note that an ENOMEM error is charged twice */
+ if (rc == -ENOMEM)
+ UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
return -1;
@@ -1043,7 +1080,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
* so we don't need to lock the hashes.
*/
static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
- u32 saddr, u32 daddr)
+ __be32 saddr, __be32 daddr)
{
struct sock *sk;
int dif;
@@ -1084,11 +1121,11 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
* including udp header and folding it to skb->csum.
*/
static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, u32 saddr, u32 daddr)
+ unsigned short ulen, __be32 saddr, __be32 daddr)
{
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else if (skb->ip_summed == CHECKSUM_HW) {
+ } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -1109,8 +1146,8 @@ int udp_rcv(struct sk_buff *skb)
struct udphdr *uh;
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
- u32 saddr = skb->nh.iph->saddr;
- u32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = skb->nh.iph->saddr;
+ __be32 daddr = skb->nh.iph->daddr;
int len = skb->len;
/*
@@ -1469,11 +1506,10 @@ static int udp_seq_open(struct inode *inode, struct file *file)
struct udp_seq_afinfo *afinfo = PDE(inode)->data;
struct seq_file *seq;
int rc = -ENOMEM;
- struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
- memset(s, 0, sizeof(*s));
s->family = afinfo->family;
s->seq_ops.start = udp_seq_start;
s->seq_ops.next = udp_seq_next;
@@ -1527,8 +1563,8 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
{
struct inet_sock *inet = inet_sk(sp);
- unsigned int dest = inet->daddr;
- unsigned int src = inet->rcv_saddr;
+ __be32 dest = inet->daddr;
+ __be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
__u16 srcp = ntohs(inet->sport);
@@ -1583,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect);
EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(udp_hash_lock);
EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_port_rover);
+EXPORT_SYMBOL(udp_get_port);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a6..8655d038364c 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,7 +23,7 @@ int xfrm4_rcv(struct sk_buff *skb)
EXPORT_SYMBOL(xfrm4_rcv);
-static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
{
switch (nexthdr) {
case IPPROTO_IPIP:
@@ -55,7 +55,7 @@ drop:
int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
{
int err;
- u32 spi, seq;
+ __be32 spi, seq;
struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
if (x->mode->input(x, skb))
goto drop;
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
decaps = 1;
break;
}
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3dd19c9..92676b7e4034 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
* On exit, skb->h will be set to the start of the payload to be processed
* by x->type->output and skb->nh will be set to the top IP header.
*/
-static int xfrm4_transport_output(struct sk_buff *skb)
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_state *x;
struct iphdr *iph;
int ihl;
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb)
ihl = iph->ihl * 4;
skb->h.raw += ihl;
- x = skb->dst->xfrm;
skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
return 0;
}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f8d880beb12f..e23c21d31a53 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
* On exit, skb->h will be set to the start of the payload to be processed
* by x->type->output and skb->nh will be set to the top IP header.
*/
-static int xfrm4_tunnel_output(struct sk_buff *skb)
+static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct xfrm_state *x = dst->xfrm;
struct iphdr *iph, *top_iph;
int flags;
@@ -92,7 +91,6 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb->mac.raw = memmove(skb->data - skb->mac_len,
skb->mac.raw, skb->mac_len);
skb->nh.raw = skb->data;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
err = 0;
out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 193363e22932..04403fb01a58 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int err;
- if (skb->ip_summed == CHECKSUM_HW) {
- err = skb_checksum_help(skb, 0);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ err = skb_checksum_help(skb);
if (err)
goto error_nolock;
}
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = xfrm4_tunnel_check_size(skb);
if (err)
goto error_nolock;
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
if (err)
goto error;
- err = x->mode->output(skb);
+ err = x->mode->output(x, skb);
if (err)
goto error;
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
}
dst = skb->dst;
x = dst->xfrm;
- } while (x && !x->props.mode);
+ } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
err = 0;
@@ -134,7 +134,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
}
#endif
- if (!skb_shinfo(skb)->gso_size)
+ if (!skb_is_gso(skb))
return xfrm4_output_finish2(skb);
skb->protocol = htons(ETH_P_IP);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c0465284dfac..7a7a00147e55 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -9,7 +9,6 @@
*/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/inetdevice.h>
#include <net/xfrm.h>
#include <net/ip.h>
@@ -22,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
return __ip_route_output_key((struct rtable**)dst, fl);
}
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+ struct rtable *rt;
+ struct flowi fl_tunnel = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = daddr->a4,
+ },
+ },
+ };
+
+ if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+ saddr->a4 = rt->rt_src;
+ dst_release(&rt->u.dst);
+ return 0;
+ }
+ return -EHOSTUNREACH;
+}
+
static struct dst_entry *
__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
@@ -34,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
xdst->u.rt.fl.fl4_src == fl->fl4_src &&
xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
- xfrm_bundle_ok(xdst, fl, AF_INET)) {
+ xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
dst_clone(dst);
break;
}
@@ -94,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
xdst = (struct xfrm_dst *)dst1;
xdst->route = &rt->u.dst;
+ xdst->genid = xfrm[i]->genid;
dst1->next = dst_prev;
dst_prev = dst1;
- if (xfrm[i]->props.mode) {
+ if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
remote = xfrm[i]->id.daddr.a4;
local = xfrm[i]->props.saddr.a4;
tunnel = 1;
@@ -136,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
+ dst_prev->nfheader_len = 0;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
@@ -201,7 +221,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_ESP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
- u32 *ehdr = (u32 *)xprth;
+ __be32 *ehdr = (__be32 *)xprth;
fl->fl_ipsec_spi = ehdr[0];
}
@@ -209,7 +229,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_AH:
if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
- u32 *ah_hdr = (u32*)xprth;
+ __be32 *ah_hdr = (__be32*)xprth;
fl->fl_ipsec_spi = ah_hdr[1];
}
@@ -217,7 +237,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_COMP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
- u16 *ipcomp_hdr = (u16 *)xprth;
+ __be16 *ipcomp_hdr = (__be16 *)xprth;
fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
}
@@ -297,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
.dst_ops = &xfrm4_dst_ops,
.dst_lookup = xfrm4_dst_lookup,
+ .get_saddr = xfrm4_get_saddr,
.find_bundle = __xfrm4_find_bundle,
.bundle_create = __xfrm4_bundle_create,
.decode_session = _decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966e..3cc3df0c6ece 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -29,9 +29,9 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->sel.daddr.a4 = fl->fl4_dst;
x->sel.saddr.a4 = fl->fl4_src;
x->sel.dport = xfrm_flowi_dport(fl);
- x->sel.dport_mask = ~0;
+ x->sel.dport_mask = htons(0xffff);
x->sel.sport = xfrm_flowi_sport(fl);
- x->sel.sport_mask = ~0;
+ x->sel.sport_mask = htons(0xffff);
x->sel.prefixlen_d = 32;
x->sel.prefixlen_s = 32;
x->sel.proto = fl->proto;
@@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->props.saddr = tmpl->saddr;
if (x->props.saddr.a4 == 0)
x->props.saddr.a4 = saddr->a4;
- if (tmpl->mode && x->props.saddr.a4 == 0) {
- struct rtable *rt;
- struct flowi fl_tunnel = {
- .nl_u = {
- .ip4_u = {
- .daddr = x->id.daddr.a4,
- }
- }
- };
- if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
- &fl_tunnel, AF_INET)) {
- x->props.saddr.a4 = rt->rt_src;
- dst_release(&rt->u.dst);
- }
- }
x->props.mode = tmpl->mode;
x->props.reqid = tmpl->reqid;
x->props.family = AF_INET;
}
-static struct xfrm_state *
-__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
-{
- unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
- struct xfrm_state *x;
-
- list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
- if (x->props.family == AF_INET &&
- spi == x->id.spi &&
- daddr->a4 == x->id.daddr.a4 &&
- proto == x->id.proto) {
- xfrm_state_hold(x);
- return x;
- }
- }
- return NULL;
-}
-
-static struct xfrm_state *
-__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
- xfrm_address_t *daddr, xfrm_address_t *saddr,
- int create)
-{
- struct xfrm_state *x, *x0;
- unsigned h = __xfrm4_dst_hash(daddr);
-
- x0 = NULL;
-
- list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
- if (x->props.family == AF_INET &&
- daddr->a4 == x->id.daddr.a4 &&
- mode == x->props.mode &&
- proto == x->id.proto &&
- saddr->a4 == x->props.saddr.a4 &&
- reqid == x->props.reqid &&
- x->km.state == XFRM_STATE_ACQ &&
- !x->id.spi) {
- x0 = x;
- break;
- }
- }
- if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
- x0->sel.daddr.a4 = daddr->a4;
- x0->sel.saddr.a4 = saddr->a4;
- x0->sel.prefixlen_d = 32;
- x0->sel.prefixlen_s = 32;
- x0->props.saddr.a4 = saddr->a4;
- x0->km.state = XFRM_STATE_ACQ;
- x0->id.daddr.a4 = daddr->a4;
- x0->id.proto = proto;
- x0->props.family = AF_INET;
- x0->props.mode = mode;
- x0->props.reqid = reqid;
- x0->props.family = AF_INET;
- x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
- xfrm_state_hold(x0);
- x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
- add_timer(&x0->timer);
- xfrm_state_hold(x0);
- list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
- wake_up(&km_waitq);
- }
- if (x0)
- xfrm_state_hold(x0);
- return x0;
-}
-
static struct xfrm_state_afinfo xfrm4_state_afinfo = {
.family = AF_INET,
.init_flags = xfrm4_init_flags,
.init_tempsel = __xfrm4_init_tempsel,
- .state_lookup = __xfrm4_state_lookup,
- .find_acq = __xfrm4_find_acq,
};
void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c83..f110af5b1319 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
static int ipip_init_state(struct xfrm_state *x)
{
- if (!x->props.mode)
+ if (x->props.mode != XFRM_MODE_TUNNEL)
return -EINVAL;
if (x->encap)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e923d4dea418..a2d211da2aba 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -77,6 +77,7 @@ config INET6_ESP
select CRYPTO
select CRYPTO_HMAC
select CRYPTO_MD5
+ select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
---help---
@@ -97,6 +98,15 @@ config INET6_IPCOMP
If unsure, say Y.
+config IPV6_MIP6
+ bool "IPv6: Mobility (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ select XFRM
+ ---help---
+ Support for IPv6 Mobility described in RFC 3775.
+
+ If unsure, say N.
+
config INET6_XFRM_TUNNEL
tristate
select INET6_TUNNEL
@@ -126,6 +136,13 @@ config INET6_XFRM_MODE_TUNNEL
If unsure, say Y.
+config INET6_XFRM_MODE_ROUTEOPTIMIZATION
+ tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ select XFRM
+ ---help---
+ Support for MIPv6 route optimization mode.
+
config IPV6_TUNNEL
tristate "IPv6: IPv6-in-IPv6 tunnel"
select INET6_TUNNEL
@@ -135,3 +152,31 @@ config IPV6_TUNNEL
If unsure, say N.
+config IPV6_SUBTREES
+ bool "IPv6: source address based routing"
+ depends on IPV6 && EXPERIMENTAL
+ ---help---
+ Enable routing by source address or prefix.
+
+ The destination address is still the primary routing key, so mixing
+ normal and source prefix specific routes in the same routing table
+ may sometimes lead to unintended routing behavior. This can be
+ avoided by defining different routing tables for the normal and
+ source prefix specific routes.
+
+ If unsure, say N.
+
+config IPV6_MULTIPLE_TABLES
+ bool "IPv6: Multiple Routing Tables"
+ depends on IPV6 && EXPERIMENTAL
+ select FIB_RULES
+ ---help---
+ Support multiple routing tables.
+
+config IPV6_ROUTE_FWMARK
+ bool "IPv6: use netfilter MARK value as routing key"
+ depends on IPV6_MULTIPLE_TABLES && NETFILTER
+ ---help---
+ If you say Y here, you will be able to specify different routes for
+ packets with different mark values (see iptables(8), MARK target).
+
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 386e0a626948..0213c6612b58 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -13,6 +13,9 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
xfrm6_output.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+
ipv6-objs += $(ipv6-y)
obj-$(CONFIG_INET6_AH) += ah6.o
@@ -22,6 +25,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4da664538f52..e03c33b2465b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -40,7 +40,6 @@
* status etc.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -49,6 +48,7 @@
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/if_arcnet.h>
#include <linux/if_infiniband.h>
@@ -73,6 +73,7 @@
#include <net/addrconf.h>
#include <net/tcp.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
@@ -118,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_RWLOCK(addrconf_hash_lock);
-/* Protects inet6 devices */
-DEFINE_RWLOCK(addrconf_lock);
-
static void addrconf_verify(unsigned long);
static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
@@ -145,7 +143,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf ipv6_devconf __read_mostly = {
.forwarding = 0,
.hop_limit = IPV6_DEFAULT_HOPLIMIT,
.mtu6 = IPV6_MIN_MTU,
@@ -174,9 +172,10 @@ struct ipv6_devconf ipv6_devconf = {
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
+ .proxy_ndp = 0,
};
-static struct ipv6_devconf ipv6_devconf_dflt = {
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.forwarding = 0,
.hop_limit = IPV6_DEFAULT_HOPLIMIT,
.mtu6 = IPV6_MIN_MTU,
@@ -204,6 +203,7 @@ static struct ipv6_devconf ipv6_devconf_dflt = {
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
+ .proxy_ndp = 0,
};
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -315,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
/* Nobody refers to this device, we may destroy it. */
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+ struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+ kfree(idev);
+}
+
void in6_dev_finish_destroy(struct inet6_dev *idev)
{
struct net_device *dev = idev->dev;
@@ -329,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
return;
}
snmp6_free_dev(idev);
- kfree(idev);
+ call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
}
static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -405,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
if (netif_carrier_ok(dev))
ndev->if_flags |= IF_READY;
- write_lock_bh(&addrconf_lock);
- dev->ip6_ptr = ndev;
- write_unlock_bh(&addrconf_lock);
+ /* protected by rtnl_lock */
+ rcu_assign_pointer(dev->ip6_ptr, ndev);
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
@@ -471,7 +476,7 @@ static void addrconf_forward_change(void)
read_lock(&dev_base_lock);
for (dev=dev_base; dev; dev=dev->next) {
- read_lock(&addrconf_lock);
+ rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
@@ -479,7 +484,7 @@ static void addrconf_forward_change(void)
if (changed)
dev_forward_change(idev);
}
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
}
read_unlock(&dev_base_lock);
}
@@ -509,6 +514,26 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
kfree(ifp);
}
+static void
+ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ifa, **ifap;
+ int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
+
+ /*
+ * Each device address list is sorted in order of scope -
+ * global before linklocal.
+ */
+ for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
+ ifap = &ifa->if_next) {
+ if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
+ break;
+ }
+
+ ifp->if_next = *ifap;
+ *ifap = ifp;
+}
+
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
@@ -520,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
int hash;
int err = 0;
- read_lock_bh(&addrconf_lock);
+ rcu_read_lock_bh();
if (idev->dead) {
err = -ENODEV; /*XXX*/
goto out2;
@@ -559,6 +584,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->flags = flags | IFA_F_TENTATIVE;
ifa->cstamp = ifa->tstamp = jiffies;
+ ifa->rt = rt;
+
ifa->idev = idev;
in6_dev_hold(idev);
/* For caller */
@@ -574,8 +601,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
write_lock(&idev->lock);
/* Add to inet6_dev unicast addr list. */
- ifa->if_next = idev->addr_list;
- idev->addr_list = ifa;
+ ipv6_link_dev_addr(idev, ifa);
#ifdef CONFIG_IPV6_PRIVACY
if (ifa->flags&IFA_F_TEMPORARY) {
@@ -585,12 +611,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
}
#endif
- ifa->rt = rt;
-
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
out2:
- read_unlock_bh(&addrconf_lock);
+ rcu_read_unlock_bh();
if (likely(err == 0))
atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
@@ -716,7 +740,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (onlink == 0) {
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
rt = NULL;
} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
rt->rt6i_expires = expires;
@@ -893,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
memset(&hiscore, 0, sizeof(hiscore));
read_lock(&dev_base_lock);
- read_lock(&addrconf_lock);
+ rcu_read_lock();
for (dev = dev_base; dev; dev=dev->next) {
struct inet6_dev *idev;
@@ -988,7 +1012,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
continue;
} else if (score.scope < hiscore.scope) {
if (score.scope < daddr_scope)
- continue;
+ break; /* addresses sorted by scope */
else {
score.rule = 2;
goto record_it;
@@ -1014,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
continue;
}
- /* Rule 4: Prefer home address -- not implemented yet */
+ /* Rule 4: Prefer home address */
+#ifdef CONFIG_IPV6_MIP6
+ if (hiscore.rule < 4) {
+ if (ifa_result->flags & IFA_F_HOMEADDRESS)
+ hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+ hiscore.rule++;
+ }
+ if (ifa->flags & IFA_F_HOMEADDRESS) {
+ score.attrs |= IPV6_SADDR_SCORE_HOA;
+ if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
+ score.rule = 4;
+ goto record_it;
+ }
+ } else {
+ if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
+ continue;
+ }
+#else
if (hiscore.rule < 4)
hiscore.rule++;
+#endif
/* Rule 5: Prefer outgoing interface */
if (hiscore.rule < 5) {
@@ -1105,7 +1147,7 @@ record_it:
}
read_unlock_bh(&idev->lock);
}
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
read_unlock(&dev_base_lock);
if (!ifa_result)
@@ -1129,7 +1171,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
struct inet6_dev *idev;
int err = -EADDRNOTAVAIL;
- read_lock(&addrconf_lock);
+ rcu_read_lock();
if ((idev = __in6_dev_get(dev)) != NULL) {
struct inet6_ifaddr *ifp;
@@ -1143,7 +1185,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
}
read_unlock_bh(&idev->lock);
}
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
return err;
}
@@ -1216,8 +1258,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
- u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+ __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+ __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(sk_rcv_saddr6);
@@ -1444,7 +1486,7 @@ static void ipv6_regen_rndid(unsigned long data)
struct inet6_dev *idev = (struct inet6_dev *) data;
unsigned long expires;
- read_lock_bh(&addrconf_lock);
+ rcu_read_lock_bh();
write_lock_bh(&idev->lock);
if (idev->dead)
@@ -1468,7 +1510,7 @@ static void ipv6_regen_rndid(unsigned long data)
out:
write_unlock_bh(&idev->lock);
- read_unlock_bh(&addrconf_lock);
+ rcu_read_unlock_bh();
in6_dev_put(idev);
}
@@ -1489,59 +1531,56 @@ static void
addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
unsigned long expires, u32 flags)
{
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_PREFIX,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_expires = expires,
+ .fc_dst_len = plen,
+ .fc_flags = RTF_UP | flags,
+ };
- memset(&rtmsg, 0, sizeof(rtmsg));
- ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
- rtmsg.rtmsg_dst_len = plen;
- rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
- rtmsg.rtmsg_ifindex = dev->ifindex;
- rtmsg.rtmsg_info = expires;
- rtmsg.rtmsg_flags = RTF_UP|flags;
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+ ipv6_addr_copy(&cfg.fc_dst, pfx);
/* Prevent useless cloning on PtP SIT.
This thing is done here expecting that the whole
class of non-broadcast devices need not cloning.
*/
- if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
- rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+ if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+ cfg.fc_flags |= RTF_NONEXTHOP;
- ip6_route_add(&rtmsg, NULL, NULL, NULL);
+ ip6_route_add(&cfg);
}
/* Create "default" multicast route to the interface */
static void addrconf_add_mroute(struct net_device *dev)
{
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_LOCAL,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_dst_len = 8,
+ .fc_flags = RTF_UP,
+ };
+
+ ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- memset(&rtmsg, 0, sizeof(rtmsg));
- ipv6_addr_set(&rtmsg.rtmsg_dst,
- htonl(0xFF000000), 0, 0, 0);
- rtmsg.rtmsg_dst_len = 8;
- rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
- rtmsg.rtmsg_ifindex = dev->ifindex;
- rtmsg.rtmsg_flags = RTF_UP;
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
- ip6_route_add(&rtmsg, NULL, NULL, NULL);
+ ip6_route_add(&cfg);
}
static void sit_route_add(struct net_device *dev)
{
- struct in6_rtmsg rtmsg;
-
- memset(&rtmsg, 0, sizeof(rtmsg));
-
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
- rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_MAIN,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_dst_len = 96,
+ .fc_flags = RTF_UP | RTF_NONEXTHOP,
+ };
/* prefix length - 96 bits "::d.d.d.d" */
- rtmsg.rtmsg_dst_len = 96;
- rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
- rtmsg.rtmsg_ifindex = dev->ifindex;
-
- ip6_route_add(&rtmsg, NULL, NULL, NULL);
+ ip6_route_add(&cfg);
}
static void addrconf_add_lroute(struct net_device *dev)
@@ -1642,7 +1681,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (rt->rt6i_flags&RTF_EXPIRES) {
if (valid_lft == 0) {
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
rt = NULL;
} else {
rt->rt6i_expires = jiffies + rt_expires;
@@ -1851,7 +1890,8 @@ err_exit:
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
+ __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -1860,21 +1900,41 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
ASSERT_RTNL();
+ /* check the lifetime */
+ if (!valid_lft || prefered_lft > valid_lft)
+ return -EINVAL;
+
if ((dev = __dev_get_by_index(ifindex)) == NULL)
return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
-
if ((idev = addrconf_add_dev(dev)) == NULL)
return -ENOBUFS;
scope = ipv6_addr_scope(pfx);
- ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT);
+ if (valid_lft == INFINITY_LIFE_TIME)
+ ifa_flags |= IFA_F_PERMANENT;
+ else if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+
+ if (prefered_lft == 0)
+ ifa_flags |= IFA_F_DEPRECATED;
+ else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
+ (prefered_lft != INFINITY_LIFE_TIME))
+ prefered_lft = 0x7FFFFFFF/HZ;
+
+ ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+
if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = jiffies;
+ spin_unlock_bh(&ifp->lock);
+
addrconf_dad_start(ifp, 0);
in6_ifa_put(ifp);
+ addrconf_verify(0);
return 0;
}
@@ -1927,7 +1987,8 @@ int addrconf_add_ifaddr(void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
+ IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
rtnl_unlock();
return err;
}
@@ -2300,10 +2361,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
Do not dev_put!
*/
if (how == 1) {
- write_lock_bh(&addrconf_lock);
- dev->ip6_ptr = NULL;
idev->dead = 1;
- write_unlock_bh(&addrconf_lock);
+
+ /* protected by rtnl_lock */
+ rcu_assign_pointer(dev->ip6_ptr, NULL);
/* Step 1.5: remove snmp6 entry */
snmp6_unregister_dev(idev);
@@ -2470,7 +2531,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
spin_lock_bh(&ifp->lock);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- !(ifp->flags&IFA_F_TENTATIVE)) {
+ !(ifp->flags&IFA_F_TENTATIVE) ||
+ ifp->flags & IFA_F_NODAD) {
ifp->flags &= ~IFA_F_TENTATIVE;
spin_unlock_bh(&ifp->lock);
read_unlock_bh(&idev->lock);
@@ -2715,6 +2777,26 @@ void if6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
+#ifdef CONFIG_IPV6_MIP6
+/* Check if address is a home address configured on any interface. */
+int ipv6_chk_home_addr(struct in6_addr *addr)
+{
+ int ret = 0;
+ struct inet6_ifaddr * ifp;
+ u8 hash = ipv6_addr_hash(addr);
+ read_lock_bh(&addrconf_hash_lock);
+ for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+ if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
+ (ifp->flags & IFA_F_HOMEADDRESS)) {
+ ret = 1;
+ break;
+ }
+ }
+ read_unlock_bh(&addrconf_hash_lock);
+ return ret;
+}
+#endif
+
/*
* Periodic address status verification
*/
@@ -2753,12 +2835,16 @@ restart:
ifp->idev->nd_parms->retrans_time / HZ;
#endif
- if (age >= ifp->valid_lft) {
+ if (ifp->valid_lft != INFINITY_LIFE_TIME &&
+ age >= ifp->valid_lft) {
spin_unlock(&ifp->lock);
in6_ifa_hold(ifp);
read_unlock(&addrconf_hash_lock);
ipv6_del_addr(ifp);
goto restart;
+ } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
+ spin_unlock(&ifp->lock);
+ continue;
} else if (age >= ifp->prefered_lft) {
/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
int deprecate = 0;
@@ -2821,178 +2907,267 @@ restart:
spin_unlock_bh(&addrconf_verify_lock);
}
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+{
+ struct in6_addr *pfx = NULL;
+
+ if (addr)
+ pfx = nla_data(addr);
+
+ if (local) {
+ if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+ pfx = NULL;
+ else
+ pfx = nla_data(local);
+ }
+
+ return pfx;
+}
+
+static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
+ [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
+ [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
+};
+
static int
inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx;
+ int err;
- pfx = NULL;
- if (rta[IFA_ADDRESS-1]) {
- if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
- return -EINVAL;
- pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
- }
- if (rta[IFA_LOCAL-1]) {
- if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
- return -EINVAL;
- pfx = RTA_DATA(rta[IFA_LOCAL-1]);
- }
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
if (pfx == NULL)
return -EINVAL;
return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
}
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+ u32 prefered_lft, u32 valid_lft)
+{
+ if (!valid_lft || (prefered_lft > valid_lft))
+ return -EINVAL;
+
+ if (valid_lft == INFINITY_LIFE_TIME)
+ ifa_flags |= IFA_F_PERMANENT;
+ else if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+
+ if (prefered_lft == 0)
+ ifa_flags |= IFA_F_DEPRECATED;
+ else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
+ (prefered_lft != INFINITY_LIFE_TIME))
+ prefered_lft = 0x7FFFFFFF/HZ;
+
+ spin_lock_bh(&ifp->lock);
+ ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
+ ifp->tstamp = jiffies;
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+
+ spin_unlock_bh(&ifp->lock);
+ if (!(ifp->flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+
+ addrconf_verify(0);
+
+ return 0;
+}
+
static int
inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx;
+ struct inet6_ifaddr *ifa;
+ struct net_device *dev;
+ u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+ u8 ifa_flags;
+ int err;
- pfx = NULL;
- if (rta[IFA_ADDRESS-1]) {
- if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
- return -EINVAL;
- pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
- }
- if (rta[IFA_LOCAL-1]) {
- if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
- return -EINVAL;
- pfx = RTA_DATA(rta[IFA_LOCAL-1]);
- }
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ if (tb[IFA_CACHEINFO]) {
+ struct ifa_cacheinfo *ci;
+
+ ci = nla_data(tb[IFA_CACHEINFO]);
+ valid_lft = ci->ifa_valid;
+ preferred_lft = ci->ifa_prefered;
+ } else {
+ preferred_lft = INFINITY_LIFE_TIME;
+ valid_lft = INFINITY_LIFE_TIME;
+ }
+
+ dev = __dev_get_by_index(ifm->ifa_index);
+ if (dev == NULL)
+ return -ENODEV;
+
+ /* We ignore other flags so far. */
+ ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+
+ ifa = ipv6_get_ifaddr(pfx, dev, 1);
+ if (ifa == NULL) {
+ /*
+ * It would be best to check for !NLM_F_CREATE here but
+ * userspace alreay relies on not having to provide this.
+ */
+ return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
+ ifa_flags, preferred_lft, valid_lft);
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL ||
+ !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ err = -EEXIST;
+ else
+ err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+
+ in6_ifa_put(ifa);
+
+ return err;
+}
+
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+ u8 scope, int ifindex)
+{
+ struct ifaddrmsg *ifm;
+
+ ifm = nlmsg_data(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = prefixlen;
+ ifm->ifa_flags = flags;
+ ifm->ifa_scope = scope;
+ ifm->ifa_index = ifindex;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+ unsigned long tstamp, u32 preferred, u32 valid)
+{
+ struct ifa_cacheinfo ci;
+
+ ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
+ + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
+ + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.ifa_prefered = preferred;
+ ci.ifa_valid = valid;
+
+ return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}
-/* Maximum length of ifa_cacheinfo attributes */
-#define INET6_IFADDR_RTA_SPACE \
- RTA_SPACE(16) /* IFA_ADDRESS */ + \
- RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */
+static inline int rt_scope(int ifa_scope)
+{
+ if (ifa_scope & IFA_HOST)
+ return RT_SCOPE_HOST;
+ else if (ifa_scope & IFA_LINK)
+ return RT_SCOPE_LINK;
+ else if (ifa_scope & IFA_SITE)
+ return RT_SCOPE_SITE;
+ else
+ return RT_SCOPE_UNIVERSE;
+}
+
+static inline int inet6_ifaddr_msgsize(void)
+{
+ return nlmsg_total_size(sizeof(struct ifaddrmsg) +
+ nla_total_size(16) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)) +
+ 128);
+}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
u32 pid, u32 seq, int event, unsigned int flags)
{
- struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- struct ifa_cacheinfo ci;
- unsigned char *b = skb->tail;
+ u32 preferred, valid;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+ ifa->idev->dev->ifindex);
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
- ifm->ifa_family = AF_INET6;
- ifm->ifa_prefixlen = ifa->prefix_len;
- ifm->ifa_flags = ifa->flags;
- ifm->ifa_scope = RT_SCOPE_UNIVERSE;
- if (ifa->scope&IFA_HOST)
- ifm->ifa_scope = RT_SCOPE_HOST;
- else if (ifa->scope&IFA_LINK)
- ifm->ifa_scope = RT_SCOPE_LINK;
- else if (ifa->scope&IFA_SITE)
- ifm->ifa_scope = RT_SCOPE_SITE;
- ifm->ifa_index = ifa->idev->dev->ifindex;
- RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
if (!(ifa->flags&IFA_F_PERMANENT)) {
- ci.ifa_prefered = ifa->prefered_lft;
- ci.ifa_valid = ifa->valid_lft;
- if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+ preferred = ifa->prefered_lft;
+ valid = ifa->valid_lft;
+ if (preferred != INFINITY_LIFE_TIME) {
long tval = (jiffies - ifa->tstamp)/HZ;
- ci.ifa_prefered -= tval;
- if (ci.ifa_valid != INFINITY_LIFE_TIME)
- ci.ifa_valid -= tval;
+ preferred -= tval;
+ if (valid != INFINITY_LIFE_TIME)
+ valid -= tval;
}
} else {
- ci.ifa_prefered = INFINITY_LIFE_TIME;
- ci.ifa_valid = INFINITY_LIFE_TIME;
- }
- ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ preferred = INFINITY_LIFE_TIME;
+ valid = INFINITY_LIFE_TIME;
+ }
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
+ put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ return nlmsg_cancel(skb, nlh);
+
+ return nlmsg_end(skb, nlh);
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
u32 pid, u32 seq, int event, u16 flags)
{
- struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- struct ifa_cacheinfo ci;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
- ifm->ifa_family = AF_INET6;
- ifm->ifa_prefixlen = 128;
- ifm->ifa_flags = IFA_F_PERMANENT;
- ifm->ifa_scope = RT_SCOPE_UNIVERSE;
- if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
- ifm->ifa_scope = RT_SCOPE_SITE;
- ifm->ifa_index = ifmca->idev->dev->ifindex;
- RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
- ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
- * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
- * 100 / HZ);
- ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
- * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
- * 100 / HZ);
- ci.ifa_prefered = INFINITY_LIFE_TIME;
- ci.ifa_valid = INFINITY_LIFE_TIME;
- RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ int ifindex = ifmca->idev->dev->ifindex;
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+ scope = RT_SCOPE_SITE;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+ if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+ put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+ return nlmsg_cancel(skb, nlh);
+
+ return nlmsg_end(skb, nlh);
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
u32 pid, u32 seq, int event, unsigned int flags)
{
- struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- struct ifa_cacheinfo ci;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
- ifm->ifa_family = AF_INET6;
- ifm->ifa_prefixlen = 128;
- ifm->ifa_flags = IFA_F_PERMANENT;
- ifm->ifa_scope = RT_SCOPE_UNIVERSE;
- if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
- ifm->ifa_scope = RT_SCOPE_SITE;
- ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
- RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
- ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
- * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
- * 100 / HZ);
- ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
- * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
- * 100 / HZ);
- ci.ifa_prefered = INFINITY_LIFE_TIME;
- ci.ifa_valid = INFINITY_LIFE_TIME;
- RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ int ifindex = ifaca->aca_idev->dev->ifindex;
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+ scope = RT_SCOPE_SITE;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+ if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+ put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+ return nlmsg_cancel(skb, nlh);
+
+ return nlmsg_end(skb, nlh);
}
enum addr_type_t
@@ -3103,23 +3278,74 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
-static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ void *arg)
{
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *addr = NULL;
+ struct net_device *dev = NULL;
+ struct inet6_ifaddr *ifa;
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
+ int err;
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb) {
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
- return;
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ if (addr == NULL) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_index)
+ dev = __dev_get_by_index(ifm->ifa_index);
+
+ if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+ err = -EADDRNOTAVAIL;
+ goto errout;
}
- if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
+
+ if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+ err = -ENOBUFS;
+ goto errout_ifa;
+ }
+
+ err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, RTM_NEWADDR, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
- return;
+ goto errout_ifa;
+ }
+
+ err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout_ifa:
+ in6_ifa_put(ifa);
+errout:
+ return err;
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
}
static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3154,6 +3380,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
#endif
+ array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
}
/* Maximum length of ifinfomsg attributes */
@@ -3260,20 +3487,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE);
+ int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
+ int err = -ENOBUFS;
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb) {
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
- return;
- }
- if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
+ skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
}
/* Maximum length of prefix_cacheinfo attributes */
@@ -3325,33 +3555,40 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE);
+ int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
+ int err = -ENOBUFS;
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb) {
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
- return;
- }
- if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
+ skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
+
+ err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
}
static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
+ [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr,
+ .dumpit = inet6_dump_ifaddr, },
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
.dumpit = inet6_dump_fib, },
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, },
+#endif
};
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3360,7 +3597,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
- ip6_ins_rt(ifp->rt, NULL, NULL, NULL);
+ ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
break;
@@ -3369,7 +3606,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
dst_hold(&ifp->rt->u.dst);
- if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
+ if (ip6_del_rt(ifp->rt))
dst_free(&ifp->rt->u.dst);
break;
}
@@ -3377,10 +3614,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
- read_lock_bh(&addrconf_lock);
+ rcu_read_lock_bh();
if (likely(ifp->idev->dead == 0))
__ipv6_ifa_notify(event, ifp);
- read_unlock_bh(&addrconf_lock);
+ rcu_read_unlock_bh();
}
#ifdef CONFIG_SYSCTL
@@ -3477,7 +3714,7 @@ static struct addrconf_sysctl_table
ctl_table addrconf_conf_dir[2];
ctl_table addrconf_proto_dir[2];
ctl_table addrconf_root_dir[2];
-} addrconf_sysctl = {
+} addrconf_sysctl __read_mostly = {
.sysctl_header = NULL,
.addrconf_vars = {
{
@@ -3667,6 +3904,14 @@ static struct addrconf_sysctl_table
#endif
#endif
{
+ .ctl_name = NET_IPV6_PROXY_NDP,
+ .procname = "proxy_ndp",
+ .data = &ipv6_devconf.proxy_ndp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = 0, /* sentinel */
}
},
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e19457fe4f6e..e94eccb99707 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -23,7 +23,6 @@
#include <linux/module.h>
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -60,6 +59,9 @@
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
#endif
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -68,7 +70,7 @@ MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
MODULE_LICENSE("GPL");
-int sysctl_ipv6_bindv6only;
+int sysctl_ipv6_bindv6only __read_mostly;
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
@@ -244,7 +246,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- __u32 v4addr = 0;
+ __be32 v4addr = 0;
unsigned short snum;
int addr_type = 0;
int err = 0;
@@ -638,6 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet->dport;
fl.fl_ip_sport = inet->sport;
+ security_sk_classify_flow(sk, &fl);
if (np->opt && np->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
@@ -659,9 +662,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
return err;
}
- ip6_dst_store(sk, dst, NULL);
- sk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ __ip6_dst_store(sk, dst, NULL, NULL);
}
return 0;
@@ -760,6 +761,8 @@ static int __init inet6_init(void)
struct list_head *r;
int err;
+ BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+
#ifdef MODULE
#if 0 /* FIXME --RR */
if (!mod_member_present(&__this_module, can_unload))
@@ -769,11 +772,6 @@ static int __init inet6_init(void)
#endif
#endif
- if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
- printk(KERN_CRIT "inet6_proto_init: size fault\n");
- return -EINVAL;
- }
-
err = proto_register(&tcpv6_prot, 1);
if (err)
goto out;
@@ -859,6 +857,9 @@ static int __init inet6_init(void)
ipv6_frag_init();
ipv6_nodata_init();
ipv6_destopt_init();
+#ifdef CONFIG_IPV6_MIP6
+ mip6_init();
+#endif
/* Init v6 transport protocols. */
udpv6_init();
@@ -922,6 +923,9 @@ static void __exit inet6_exit(void)
tcp6_proc_exit();
raw6_proc_exit();
#endif
+#ifdef CONFIG_IPV6_MIP6
+ mip6_fini();
+#endif
/* Cleanup code parts. */
sit_cleanup();
ip6_flowlabel_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index d31c0d6c0448..b0d83e8e4252 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -24,7 +24,6 @@
* This file is derived from net/ipv4/ah.c.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/ah.h>
@@ -75,6 +74,66 @@ bad:
return 0;
}
+#ifdef CONFIG_IPV6_MIP6
+/**
+ * ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ * @iph: IPv6 header
+ * @destopt: destionation options header
+ */
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
+{
+ u8 *opt = (u8 *)destopt;
+ int len = ipv6_optlen(destopt);
+ int off = 0;
+ int optlen = 0;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+
+ switch (opt[off]) {
+
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ if (len < 2)
+ goto bad;
+ optlen = opt[off+1]+2;
+ if (len < optlen)
+ goto bad;
+
+ /* Rearrange the source address in @iph and the
+ * addresses in home address option for final source.
+ * See 11.3.2 of RFC 3775 for details.
+ */
+ if (opt[off] == IPV6_TLV_HAO) {
+ struct in6_addr final_addr;
+ struct ipv6_destopt_hao *hao;
+
+ hao = (struct ipv6_destopt_hao *)&opt[off];
+ if (hao->length != sizeof(hao->addr)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
+ goto bad;
+ }
+ ipv6_addr_copy(&final_addr, &hao->addr);
+ ipv6_addr_copy(&hao->addr, &iph->saddr);
+ ipv6_addr_copy(&iph->saddr, &final_addr);
+ }
+ break;
+ }
+
+ off += optlen;
+ len -= optlen;
+ }
+ /* Note: ok if len == 0 */
+bad:
+ return;
+}
+#endif
+
/**
* ipv6_rearrange_rthdr - rearrange IPv6 routing header
* @iph: IPv6 header
@@ -114,7 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
ipv6_addr_copy(&iph->daddr, &final_addr);
}
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
{
union {
struct ipv6hdr *iph;
@@ -129,8 +188,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
while (exthdr.raw < end) {
switch (nexthdr) {
- case NEXTHDR_HOP:
case NEXTHDR_DEST:
+#ifdef CONFIG_IPV6_MIP6
+ if (dir == XFRM_POLICY_OUT)
+ ipv6_rearrange_destopt(iph, exthdr.opth);
+#endif
+ case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
LIMIT_NETDEBUG(
KERN_WARNING "overrun %sopts\n",
@@ -165,6 +228,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
u8 nexthdr;
char tmp_base[8];
struct {
+#ifdef CONFIG_IPV6_MIP6
+ struct in6_addr saddr;
+#endif
struct in6_addr daddr;
char hdrs[0];
} *tmp_ext;
@@ -189,10 +255,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
err = -ENOMEM;
goto error;
}
+#ifdef CONFIG_IPV6_MIP6
+ memcpy(tmp_ext, &top_iph->saddr, extlen);
+#else
memcpy(tmp_ext, &top_iph->daddr, extlen);
+#endif
err = ipv6_clear_mutable_options(top_iph,
extlen - sizeof(*tmp_ext) +
- sizeof(*top_iph));
+ sizeof(*top_iph),
+ XFRM_POLICY_OUT);
if (err)
goto error_free_iph;
}
@@ -214,13 +285,20 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(++x->replay.oseq);
xfrm_aevent_doreplay(x);
- ahp->icv(ahp, skb, ah->auth_data);
+ err = ah_mac_digest(ahp, skb, ah->auth_data);
+ if (err)
+ goto error_free_iph;
+ memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
err = 0;
memcpy(top_iph, tmp_base, sizeof(tmp_base));
if (tmp_ext) {
+#ifdef CONFIG_IPV6_MIP6
+ memcpy(&top_iph->saddr, tmp_ext, extlen);
+#else
memcpy(&top_iph->daddr, tmp_ext, extlen);
+#endif
error_free_iph:
kfree(tmp_ext);
}
@@ -252,6 +330,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
u16 hdr_len;
u16 ah_hlen;
int nexthdr;
+ int err = -EINVAL;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -279,7 +358,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!tmp_hdr)
goto out;
memcpy(tmp_hdr, skb->nh.raw, hdr_len);
- if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
+ if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
goto free_out;
skb->nh.ipv6h->priority = 0;
skb->nh.ipv6h->flow_lbl[0] = 0;
@@ -293,8 +372,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
skb_push(skb, hdr_len);
- ahp->icv(ahp, skb, ah->auth_data);
- if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+ err = ah_mac_digest(ahp, skb, ah->auth_data);
+ if (err)
+ goto free_out;
+ err = -EINVAL;
+ if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
x->stats.integrity_failed++;
goto free_out;
@@ -311,7 +393,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
free_out:
kfree(tmp_hdr);
out:
- return -EINVAL;
+ return err;
}
static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -339,6 +421,7 @@ static int ah6_init_state(struct xfrm_state *x)
{
struct ah_data *ahp = NULL;
struct xfrm_algo_desc *aalg_desc;
+ struct crypto_hash *tfm;
if (!x->aalg)
goto error;
@@ -356,24 +439,27 @@ static int ah6_init_state(struct xfrm_state *x)
ahp->key = x->aalg->alg_key;
ahp->key_len = (x->aalg->alg_key_len+7)/8;
- ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
- if (!ahp->tfm)
+ tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ goto error;
+
+ ahp->tfm = tfm;
+ if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
goto error;
- ahp->icv = ah_hmac_digest;
/*
* Lookup the algorithm description maintained by xfrm_algo,
* verify crypto transform properties, and store information
* we need for AH processing. This lookup cannot fail here
- * after a successful crypto_alloc_tfm().
+ * after a successful crypto_alloc_hash().
*/
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_tfm_alg_digestsize(ahp->tfm)) {
+ crypto_hash_digestsize(tfm)) {
printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
- x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+ x->aalg->alg_name, crypto_hash_digestsize(tfm),
aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
@@ -388,7 +474,7 @@ static int ah6_init_state(struct xfrm_state *x)
goto error;
x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct ipv6hdr);
x->data = ahp;
@@ -397,7 +483,7 @@ static int ah6_init_state(struct xfrm_state *x)
error:
if (ahp) {
kfree(ahp->work_icv);
- crypto_free_tfm(ahp->tfm);
+ crypto_free_hash(ahp->tfm);
kfree(ahp);
}
return -EINVAL;
@@ -412,7 +498,7 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp->work_icv);
ahp->work_icv = NULL;
- crypto_free_tfm(ahp->tfm);
+ crypto_free_hash(ahp->tfm);
ahp->tfm = NULL;
kfree(ahp);
}
@@ -425,7 +511,8 @@ static struct xfrm_type ah6_type =
.init_state = ah6_init_state,
.destructor = ah6_destroy,
.input = ah6_input,
- .output = ah6_output
+ .output = ah6_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
};
static struct inet6_protocol ah6_protocol = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 39ec528923f6..a9604764e015 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -14,7 +14,6 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -57,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
int onlink;
onlink = 0;
- read_lock(&addrconf_lock);
+ rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
read_lock_bh(&idev->lock);
@@ -69,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
}
read_unlock_bh(&idev->lock);
}
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
return onlink;
}
@@ -336,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
write_unlock_bh(&idev->lock);
dst_hold(&rt->u.dst);
- if (ip6_ins_rt(rt, NULL, NULL, NULL))
+ if (ip6_ins_rt(rt))
dst_release(&rt->u.dst);
addrconf_join_solict(dev, &aca->aca_addr);
@@ -379,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
addrconf_leave_solict(idev, &aca->aca_addr);
dst_hold(&aca->aca_rt->u.dst);
- if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
+ if (ip6_del_rt(aca->aca_rt))
dst_free(&aca->aca_rt->u.dst);
else
dst_release(&aca->aca_rt->u.dst);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 99a6eb23378b..7206747022fc 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -156,6 +156,8 @@ ipv4_connected:
if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
fl.oif = np->mcast_oif;
+ security_sk_classify_flow(sk, &fl);
+
if (flowlabel) {
if (flowlabel->opt && flowlabel->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
@@ -191,7 +193,12 @@ ipv4_connected:
ip6_dst_store(sk, dst,
ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
- &np->daddr : NULL);
+ &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
sk->sk_state = TCP_ESTABLISHED;
out:
@@ -641,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
- /*
- * TYPE 0
- */
- if (rthdr->type) {
+ switch (rthdr->type) {
+ case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+#endif
+ break;
+ default:
err = -EINVAL;
goto exit_f;
}
@@ -696,7 +706,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
}
tc = *(int *)CMSG_DATA(cmsg);
- if (tc < 0 || tc > 0xff)
+ if (tc < -1 || tc > 0xff)
goto exit_f;
err = 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a15a6f320f70..e78680a9985b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -24,7 +24,7 @@
* This file is derived from net/ipv4/esp.c
*/
-#include <linux/config.h>
+#include <linux/err.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -45,7 +45,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
struct ipv6hdr *top_iph;
struct ipv6_esp_hdr *esph;
- struct crypto_tfm *tfm;
+ struct crypto_blkcipher *tfm;
+ struct blkcipher_desc desc;
struct esp_data *esp;
struct sk_buff *trailer;
int blksize;
@@ -68,7 +69,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
alen = esp->auth.icv_trunc_len;
tfm = esp->conf.tfm;
- blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+ desc.tfm = tfm;
+ desc.flags = 0;
+ blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
clen = ALIGN(clen + 2, blksize);
if (esp->conf.padlen)
clen = ALIGN(clen, esp->conf.padlen);
@@ -96,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esph->seq_no = htonl(++x->replay.oseq);
xfrm_aevent_doreplay(x);
- if (esp->conf.ivlen)
- crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ if (esp->conf.ivlen) {
+ if (unlikely(!esp->conf.ivinitted)) {
+ get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 1;
+ }
+ crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+ }
do {
struct scatterlist *sg = &esp->sgbuf[0];
@@ -108,24 +116,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
goto error;
}
skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
- crypto_cipher_encrypt(tfm, sg, sg, clen);
+ err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
} while (0);
+ if (unlikely(err))
+ goto error;
+
if (esp->conf.ivlen) {
- memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
- crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+ crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
}
if (esp->auth.icv_full_len) {
- esp->auth.icv(esp, skb, (u8*)esph-skb->data,
- sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
- pskb_put(skb, trailer, alen);
+ err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+ sizeof(*esph) + esp->conf.ivlen + clen);
+ memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
}
- err = 0;
-
error:
return err;
}
@@ -135,8 +144,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *iph;
struct ipv6_esp_hdr *esph;
struct esp_data *esp = x->data;
+ struct crypto_blkcipher *tfm = esp->conf.tfm;
+ struct blkcipher_desc desc = { .tfm = tfm };
struct sk_buff *trailer;
- int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
@@ -156,15 +167,16 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
/* If integrity check is required, do this. */
if (esp->auth.icv_full_len) {
- u8 sum[esp->auth.icv_full_len];
- u8 sum1[alen];
+ u8 sum[alen];
- esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+ ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
+ if (ret)
+ goto out;
- if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+ if (skb_copy_bits(skb, skb->len - alen, sum, alen))
BUG();
- if (unlikely(memcmp(sum, sum1, alen))) {
+ if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
x->stats.integrity_failed++;
ret = -EINVAL;
goto out;
@@ -183,7 +195,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
/* Get ivec. This can be wrong, check against another impls. */
if (esp->conf.ivlen)
- crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+ crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
{
u8 nexthdr[2];
@@ -198,9 +210,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
}
}
skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
- crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+ ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
+ if (unlikely(ret))
+ goto out;
if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
BUG();
@@ -226,9 +240,9 @@ out:
static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
@@ -267,11 +281,11 @@ static void esp6_destroy(struct xfrm_state *x)
if (!esp)
return;
- crypto_free_tfm(esp->conf.tfm);
+ crypto_free_blkcipher(esp->conf.tfm);
esp->conf.tfm = NULL;
kfree(esp->conf.ivec);
esp->conf.ivec = NULL;
- crypto_free_tfm(esp->auth.tfm);
+ crypto_free_hash(esp->auth.tfm);
esp->auth.tfm = NULL;
kfree(esp->auth.work_icv);
esp->auth.work_icv = NULL;
@@ -281,6 +295,7 @@ static void esp6_destroy(struct xfrm_state *x)
static int esp6_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
+ struct crypto_blkcipher *tfm;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -299,24 +314,29 @@ static int esp6_init_state(struct xfrm_state *x)
if (x->aalg) {
struct xfrm_algo_desc *aalg_desc;
+ struct crypto_hash *hash;
esp->auth.key = x->aalg->alg_key;
esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
- esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
- if (esp->auth.tfm == NULL)
+ hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hash))
+ goto error;
+
+ esp->auth.tfm = hash;
+ if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
goto error;
- esp->auth.icv = esp_hmac_digest;
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_tfm_alg_digestsize(esp->auth.tfm)) {
- printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
- x->aalg->alg_name,
- crypto_tfm_alg_digestsize(esp->auth.tfm),
- aalg_desc->uinfo.auth.icv_fullbits/8);
- goto error;
+ crypto_hash_digestsize(hash)) {
+ NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_hash_digestsize(hash),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
+ goto error;
}
esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
@@ -328,24 +348,22 @@ static int esp6_init_state(struct xfrm_state *x)
}
esp->conf.key = x->ealg->alg_key;
esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
- if (x->props.ealgo == SADB_EALG_NULL)
- esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
- else
- esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
- if (esp->conf.tfm == NULL)
+ tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
goto error;
- esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+ esp->conf.tfm = tfm;
+ esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
esp->conf.padlen = 0;
if (esp->conf.ivlen) {
esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
if (unlikely(esp->conf.ivec == NULL))
goto error;
- get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 0;
}
- if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+ if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
goto error;
x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct ipv6hdr);
x->data = esp;
return 0;
@@ -366,7 +384,8 @@ static struct xfrm_type esp6_type =
.destructor = esp6_destroy,
.get_max_size = esp6_get_max_size,
.input = esp6_input,
- .output = esp6_output
+ .output = esp6_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
};
static struct inet6_protocol esp6_protocol = {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a18d4256372c..88c96b10684c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,9 +43,54 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/xfrm.h>
+#endif
#include <asm/uaccess.h>
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+ int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+
+ if (offset + 2 > packet_len)
+ goto bad;
+ hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ len = ((hdr->hdrlen + 1) << 3);
+
+ if (offset + len > packet_len)
+ goto bad;
+
+ offset += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int opttype = skb->nh.raw[offset];
+ int optlen;
+
+ if (opttype == type)
+ return offset;
+
+ switch (opttype) {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ optlen = skb->nh.raw[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ offset += optlen;
+ len -= optlen;
+ }
+ /* not_found */
+ bad:
+ return -1;
+}
+
/*
* Parsing tlv encoded headers.
*
@@ -56,7 +101,7 @@
struct tlvtype_proc {
int type;
- int (*func)(struct sk_buff *skb, int offset);
+ int (*func)(struct sk_buff **skbp, int offset);
};
/*********************
@@ -65,8 +110,10 @@ struct tlvtype_proc {
/* An unknown option is detected, decide what to do */
-static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
{
+ struct sk_buff *skb = *skbp;
+
switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
@@ -91,8 +138,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
{
+ struct sk_buff *skb = *skbp;
struct tlvtype_proc *curr;
int off = skb->h.raw - skb->nh.raw;
int len = ((skb->h.raw[1]+1)<<3);
@@ -122,13 +170,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
/* type specific length/alignment
checks will be performed in the
func(). */
- if (curr->func(skb, off) == 0)
+ if (curr->func(skbp, off) == 0)
return 0;
break;
}
}
if (curr->type < 0) {
- if (ip6_tlvopt_unknown(skb, off) == 0)
+ if (ip6_tlvopt_unknown(skbp, off) == 0)
return 0;
}
break;
@@ -147,8 +195,85 @@ bad:
Destination options header.
*****************************/
+#ifdef CONFIG_IPV6_MIP6
+static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+{
+ struct sk_buff *skb = *skbp;
+ struct ipv6_destopt_hao *hao;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+ struct in6_addr tmp_addr;
+ int ret;
+
+ if (opt->dsthao) {
+ LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+ goto discard;
+ }
+ opt->dsthao = opt->dst1;
+ opt->dst1 = 0;
+
+ hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+
+ if (hao->length != 16) {
+ LIMIT_NETDEBUG(
+ KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+ goto discard;
+ }
+
+ if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
+ LIMIT_NETDEBUG(
+ KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
+ goto discard;
+ }
+
+ ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
+ (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
+ if (unlikely(ret < 0))
+ goto discard;
+
+ if (skb_cloned(skb)) {
+ struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+ struct inet6_skb_parm *opt2;
+
+ if (skb2 == NULL)
+ goto discard;
+
+ opt2 = IP6CB(skb2);
+ memcpy(opt2, opt, sizeof(*opt2));
+
+ kfree_skb(skb);
+
+ /* update all variable using below by copied skbuff */
+ *skbp = skb = skb2;
+ hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
+ ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+ }
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
+ ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
+ ipv6_addr_copy(&hao->addr, &tmp_addr);
+
+ if (skb->tstamp.off_sec == 0)
+ __net_timestamp(skb);
+
+ return 1;
+
+ discard:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
static struct tlvtype_proc tlvprocdestopt_lst[] = {
- /* No destination options are defined now */
+#ifdef CONFIG_IPV6_MIP6
+ {
+ .type = IPV6_TLV_HAO,
+ .func = ipv6_dest_hao,
+ },
+#endif
{-1, NULL}
};
@@ -156,6 +281,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
+#ifdef CONFIG_IPV6_MIP6
+ __u16 dstbuf;
+#endif
if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
!pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
@@ -166,10 +294,19 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
opt->lastopt = skb->h.raw - skb->nh.raw;
opt->dst1 = skb->h.raw - skb->nh.raw;
+#ifdef CONFIG_IPV6_MIP6
+ dstbuf = opt->dst1;
+#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+ skb = *skbp;
skb->h.raw += ((skb->h.raw[1]+1)<<3);
+ opt = IP6CB(skb);
+#ifdef CONFIG_IPV6_MIP6
+ opt->nhoff = dstbuf;
+#else
opt->nhoff = opt->dst1;
+#endif
return 1;
}
@@ -179,7 +316,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
static struct inet6_protocol destopt_protocol = {
.handler = ipv6_destopt_rcv,
- .flags = INET6_PROTO_NOPOLICY,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
void __init ipv6_destopt_init(void)
@@ -219,7 +356,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
- struct in6_addr *addr;
+ struct in6_addr *addr = NULL;
struct in6_addr daddr;
int n, i;
@@ -244,6 +381,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
looped_back:
if (hdr->segments_left == 0) {
+ switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+ /* Silently discard type 2 header unless it was
+ * processed by own
+ */
+ if (!addr) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
opt->lastopt = skb->h.raw - skb->nh.raw;
opt->srcrt = skb->h.raw - skb->nh.raw;
skb->h.raw += (hdr->hdrlen + 1) << 3;
@@ -253,17 +407,29 @@ looped_back:
return 1;
}
- if (hdr->type != IPV6_SRCRT_TYPE_0) {
+ switch (hdr->type) {
+ case IPV6_SRCRT_TYPE_0:
+ if (hdr->hdrlen & 0x01) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+ return -1;
+ }
+ break;
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+ /* Silently discard invalid RTH type 2 */
+ if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
return -1;
}
-
- if (hdr->hdrlen & 0x01) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
- return -1;
- }
/*
* This is the routing header forwarding algorithm from
@@ -294,7 +460,7 @@ looped_back:
hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
}
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
i = n - --hdr->segments_left;
@@ -303,6 +469,27 @@ looped_back:
addr = rthdr->addr;
addr += i - 1;
+ switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+ if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+ (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ IPPROTO_ROUTING) < 0) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ if (!ipv6_chk_home_addr(addr)) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
if (ipv6_addr_is_multicast(addr)) {
IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
@@ -340,7 +527,7 @@ looped_back:
static struct inet6_protocol rthdr_protocol = {
.handler = ipv6_rthdr_rcv,
- .flags = INET6_PROTO_NOPOLICY,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
void __init ipv6_rthdr_init(void)
@@ -421,8 +608,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
/* Router Alert as of RFC 2711 */
-static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
{
+ struct sk_buff *skb = *skbp;
+
if (skb->nh.raw[optoff+1] == 2) {
IP6CB(skb)->ra = optoff;
return 1;
@@ -435,8 +624,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
/* Jumbo payload */
-static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
{
+ struct sk_buff *skb = *skbp;
u32 pkt_len;
if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
@@ -485,8 +675,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
{ -1, }
};
-int ipv6_parse_hopopts(struct sk_buff *skb)
+int ipv6_parse_hopopts(struct sk_buff **skbp)
{
+ struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
/*
@@ -502,8 +693,10 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
}
opt->hop = sizeof(struct ipv6hdr);
- if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+ if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
+ skb = *skbp;
skb->h.raw += (skb->h.raw[1]+1)<<3;
+ opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
return 1;
}
@@ -635,14 +828,17 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
struct ipv6_txoptions *opt2;
int err;
- if (newtype != IPV6_HOPOPTS && opt->hopopt)
- tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
- if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
- tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
- if (newtype != IPV6_RTHDR && opt->srcrt)
- tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
- if (newtype != IPV6_DSTOPTS && opt->dst1opt)
- tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+ if (opt) {
+ if (newtype != IPV6_HOPOPTS && opt->hopopt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
+ if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
+ if (newtype != IPV6_RTHDR && opt->srcrt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
+ if (newtype != IPV6_DSTOPTS && opt->dst1opt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+ }
+
if (newopt && newoptlen)
tot_len += CMSG_ALIGN(newoptlen);
@@ -659,25 +855,25 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
- err = ipv6_renew_option(opt->hopopt, newopt, newoptlen,
+ err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
newtype != IPV6_HOPOPTS,
&opt2->hopopt, &p);
if (err)
goto out;
- err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen,
+ err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
newtype != IPV6_RTHDRDSTOPTS,
&opt2->dst0opt, &p);
if (err)
goto out;
- err = ipv6_renew_option(opt->srcrt, newopt, newoptlen,
+ err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
newtype != IPV6_RTHDR,
- (struct ipv6_opt_hdr **)opt2->srcrt, &p);
+ (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
if (err)
goto out;
- err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen,
+ err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
newtype != IPV6_DSTOPTS,
&opt2->dst1opt, &p);
if (err)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 000000000000..34f5bfaddfc2
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,305 @@
+/*
+ * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ * Thomas Graf <tgraf@suug.ch>
+ * Ville Nuorvala <vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule
+{
+ struct fib_rule common;
+ struct rt6key src;
+ struct rt6key dst;
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ u32 fwmark;
+ u32 fwmask;
+#endif
+ u8 tclass;
+};
+
+static struct fib_rules_ops fib6_rules_ops;
+
+static struct fib6_rule main_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7FFE,
+ .action = FR_ACT_TO_TBL,
+ .table = RT6_TABLE_MAIN,
+ },
+};
+
+static struct fib6_rule local_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0,
+ .action = FR_ACT_TO_TBL,
+ .table = RT6_TABLE_LOCAL,
+ .flags = FIB_RULE_PERMANENT,
+ },
+};
+
+static LIST_HEAD(fib6_rules);
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+ pol_lookup_t lookup)
+{
+ struct fib_lookup_arg arg = {
+ .lookup_ptr = lookup,
+ };
+
+ fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
+ if (arg.rule)
+ fib_rule_put(arg.rule);
+
+ if (arg.result)
+ return (struct dst_entry *) arg.result;
+
+ dst_hold(&ip6_null_entry.u.dst);
+ return &ip6_null_entry.u.dst;
+}
+
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
+{
+ struct rt6_info *rt = NULL;
+ struct fib6_table *table;
+ pol_lookup_t lookup = arg->lookup_ptr;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+ case FR_ACT_UNREACHABLE:
+ rt = &ip6_null_entry;
+ goto discard_pkt;
+ default:
+ case FR_ACT_BLACKHOLE:
+ rt = &ip6_blk_hole_entry;
+ goto discard_pkt;
+ case FR_ACT_PROHIBIT:
+ rt = &ip6_prohibit_entry;
+ goto discard_pkt;
+ }
+
+ table = fib6_get_table(rule->table);
+ if (table)
+ rt = lookup(table, flp, flags);
+
+ if (rt != &ip6_null_entry)
+ goto out;
+ dst_release(&rt->u.dst);
+ rt = NULL;
+ goto out;
+
+discard_pkt:
+ dst_hold(&rt->u.dst);
+out:
+ arg->result = rt;
+ return rt == NULL ? -EAGAIN : 0;
+}
+
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+ struct fib6_rule *r = (struct fib6_rule *) rule;
+
+ if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
+ return 0;
+
+ if ((flags & RT6_LOOKUP_F_HAS_SADDR) &&
+ !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+ return 0;
+
+ if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
+ return 0;
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask)
+ return 0;
+#endif
+
+ return 1;
+}
+
+static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
+ [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+ [FRA_PRIORITY] = { .type = NLA_U32 },
+ [FRA_SRC] = { .len = sizeof(struct in6_addr) },
+ [FRA_DST] = { .len = sizeof(struct in6_addr) },
+ [FRA_FWMARK] = { .type = NLA_U32 },
+ [FRA_FWMASK] = { .type = NLA_U32 },
+ [FRA_TABLE] = { .type = NLA_U32 },
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ int err = -EINVAL;
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ if (frh->src_len > 128 || frh->dst_len > 128 ||
+ (frh->tos & ~IPV6_FLOWINFO_MASK))
+ goto errout;
+
+ if (rule->action == FR_ACT_TO_TBL) {
+ if (rule->table == RT6_TABLE_UNSPEC)
+ goto errout;
+
+ if (fib6_new_table(rule->table) == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+ }
+
+ if (tb[FRA_SRC])
+ nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+ sizeof(struct in6_addr));
+
+ if (tb[FRA_DST])
+ nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+ sizeof(struct in6_addr));
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ if (tb[FRA_FWMARK]) {
+ rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+ if (rule6->fwmark) {
+ /*
+ * if the mark value is non-zero,
+ * all bits are compared by default
+ * unless a mask is explicitly specified.
+ */
+ rule6->fwmask = 0xFFFFFFFF;
+ }
+ }
+
+ if (tb[FRA_FWMASK])
+ rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
+ rule6->src.plen = frh->src_len;
+ rule6->dst.plen = frh->dst_len;
+ rule6->tclass = frh->tos;
+
+ err = 0;
+errout:
+ return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ if (frh->src_len && (rule6->src.plen != frh->src_len))
+ return 0;
+
+ if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+ return 0;
+
+ if (frh->tos && (rule6->tclass != frh->tos))
+ return 0;
+
+ if (tb[FRA_SRC] &&
+ nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+ return 0;
+
+ if (tb[FRA_DST] &&
+ nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+ return 0;
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+ return 0;
+
+ if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+ return 0;
+#endif
+
+ return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ frh->family = AF_INET6;
+ frh->dst_len = rule6->dst.plen;
+ frh->src_len = rule6->src.plen;
+ frh->tos = rule6->tclass;
+
+ if (rule6->dst.plen)
+ NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
+ &rule6->dst.addr);
+
+ if (rule6->src.plen)
+ NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
+ &rule6->src.addr);
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ if (rule6->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
+
+ if (rule6->fwmask || rule6->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
+#endif
+
+ return 0;
+
+nla_put_failure:
+ return -ENOBUFS;
+}
+
+int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return fib_rules_dump(skb, cb, AF_INET6);
+}
+
+static u32 fib6_rule_default_pref(void)
+{
+ return 0x3FFF;
+}
+
+static struct fib_rules_ops fib6_rules_ops = {
+ .family = AF_INET6,
+ .rule_size = sizeof(struct fib6_rule),
+ .action = fib6_rule_action,
+ .match = fib6_rule_match,
+ .configure = fib6_rule_configure,
+ .compare = fib6_rule_compare,
+ .fill = fib6_rule_fill,
+ .default_pref = fib6_rule_default_pref,
+ .nlgroup = RTNLGRP_IPV6_RULE,
+ .policy = fib6_rule_policy,
+ .rules_list = &fib6_rules,
+ .owner = THIS_MODULE,
+};
+
+void __init fib6_rules_init(void)
+{
+ list_add_tail(&local_rule.common.list, &fib6_rules);
+ list_add_tail(&main_rule.common.list, &fib6_rules);
+
+ fib_rules_register(&fib6_rules_ops);
+}
+
+void fib6_rules_cleanup(void)
+{
+ fib_rules_unregister(&fib6_rules_ops);
+}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1044b6fce0d5..4ec876066b3f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb)
return 0;
}
-static int sysctl_icmpv6_time = 1*HZ;
+static int sysctl_icmpv6_time __read_mostly = 1*HZ;
/*
* Check the ICMP output rate limit
@@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
return 0;
}
+#ifdef CONFIG_IPV6_MIP6
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct ipv6_destopt_hao *hao;
+ struct in6_addr tmp;
+ int off;
+
+ if (opt->dsthao) {
+ off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+ if (likely(off >= 0)) {
+ hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+ ipv6_addr_copy(&tmp, &iph->saddr);
+ ipv6_addr_copy(&iph->saddr, &hao->addr);
+ ipv6_addr_copy(&hao->addr, &tmp);
+ }
+ }
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
/*
* Send an ICMP message in response to a packet in error
*/
@@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
return;
}
+ mip6_addr_swap(skb);
+
memset(&fl, 0, sizeof(fl));
fl.proto = IPPROTO_ICMPV6;
ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
@@ -358,6 +383,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
fl.oif = iif;
fl.fl_icmp_type = type;
fl.fl_icmp_code = code;
+ security_skb_classify_flow(skb, &fl);
if (icmpv6_xmit_lock())
return;
@@ -401,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
- tclass = np->cork.tclass;
+ tclass = np->tclass;
if (tclass < 0)
tclass = 0;
@@ -472,6 +498,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipv6_addr_copy(&fl.fl6_src, saddr);
fl.oif = skb->dev->ifindex;
fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+ security_skb_classify_flow(skb, &fl);
if (icmpv6_xmit_lock())
return;
@@ -497,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
- tclass = np->cork.tclass;
+ tclass = np->tclass;
if (tclass < 0)
tclass = 0;
@@ -604,7 +631,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
/* Perform checksum. */
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
skb->csum))
break;
@@ -712,6 +739,11 @@ discard_it:
return 0;
}
+/*
+ * Special lock-class for __icmpv6_socket:
+ */
+static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
+
int __init icmpv6_init(struct net_proto_family *ops)
{
struct sock *sk;
@@ -730,6 +762,14 @@ int __init icmpv6_init(struct net_proto_family *ops)
sk = per_cpu(__icmpv6_socket, i)->sk;
sk->sk_allocation = GFP_ATOMIC;
+ /*
+ * Split off their lock-class, because sk->sk_dst_lock
+ * gets used from softirqs, which is safe for
+ * __icmpv6_socket (because those never get directly used
+ * via userspace syscalls), but unsafe for normal sockets.
+ */
+ lockdep_set_class(&sk->sk_dst_lock,
+ &icmpv6_socket_sk_dst_lock_key);
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index eb2865d5ae28..827f41d1478b 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -13,7 +13,6 @@
* 2 of the License, or(at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
@@ -158,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_sport = inet->sport;
fl.fl_ip_dport = inet->dport;
+ security_sk_classify_flow(sk, &fl);
if (np->opt && np->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -186,9 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
return err;
}
- ip6_dst_store(sk, dst, NULL);
- sk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ __ip6_dst_store(sk, dst, NULL, NULL);
}
skb->dst = dst_clone(dst);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2ae84c961678..8accd1fbeeda 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -14,7 +14,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -65,7 +64,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
{
struct sock *sk;
const struct hlist_node *node;
- const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
@@ -83,7 +82,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
const struct inet_timewait_sock *tw = inet_twsk(sk);
- if(*((__u32 *)&(tw->tw_dport)) == ports &&
+ if(*((__portpair *)&(tw->tw_dport)) == ports &&
sk->sk_family == PF_INET6) {
const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
@@ -172,7 +171,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *daddr = &np->rcv_saddr;
const struct in6_addr *saddr = &np->daddr;
const int dif = sk->sk_bound_dev_if;
- const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -189,7 +188,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
tw = inet_twsk(sk2);
- if(*((__u32 *)&(tw->tw_dport)) == ports &&
+ if(*((__portpair *)&(tw->tw_dport)) == ports &&
sk2->sk_family == PF_INET6 &&
ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2cb6149349bf..8fcae7a6510b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -18,8 +18,8 @@
* Yuji SEKIYA @USAGI: Support default route on router node;
* remove ip6_null_entry from the top of
* routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/net.h>
@@ -27,6 +27,7 @@
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/init.h>
+#include <linux/list.h>
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
@@ -69,19 +70,19 @@ struct fib6_cleaner_t
void *arg;
};
-DEFINE_RWLOCK(fib6_walker_lock);
-
+static DEFINE_RWLOCK(fib6_walker_lock);
#ifdef CONFIG_IPV6_SUBTREES
#define FWS_INIT FWS_S
-#define SUBTREE(fn) ((fn)->subtree)
#else
#define FWS_INIT FWS_L
-#define SUBTREE(fn) NULL
#endif
static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static struct rt6_info * fib6_find_prefix(struct fib6_node *fn);
static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
/*
* A routing update causes an increase of the serial number on the
@@ -94,13 +95,31 @@ static __u32 rt_sernum;
static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
-struct fib6_walker_t fib6_walker_list = {
+static struct fib6_walker_t fib6_walker_list = {
.prev = &fib6_walker_list,
.next = &fib6_walker_list,
};
#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+ write_lock_bh(&fib6_walker_lock);
+ w->next = fib6_walker_list.next;
+ w->prev = &fib6_walker_list;
+ w->next->prev = w;
+ w->prev->next = w;
+ write_unlock_bh(&fib6_walker_lock);
+}
+
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+ write_lock_bh(&fib6_walker_lock);
+ w->next->prev = w->prev;
+ w->prev->next = w->next;
+ w->prev = w->next = w;
+ write_unlock_bh(&fib6_walker_lock);
+}
static __inline__ u32 fib6_new_sernum(void)
{
u32 n = ++rt_sernum;
@@ -148,6 +167,253 @@ static __inline__ void rt6_release(struct rt6_info *rt)
dst_free(&rt->u.dst);
}
+static struct fib6_table fib6_main_tbl = {
+ .tb6_id = RT6_TABLE_MAIN,
+ .tb6_lock = RW_LOCK_UNLOCKED,
+ .tb6_root = {
+ .leaf = &ip6_null_entry,
+ .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+ },
+};
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB_TABLE_HASHSZ 256
+#else
+#define FIB_TABLE_HASHSZ 1
+#endif
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+static void fib6_link_table(struct fib6_table *tb)
+{
+ unsigned int h;
+
+ h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
+
+ /*
+ * No protection necessary, this is the only list mutatation
+ * operation, tables never disappear once they exist.
+ */
+ hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+static struct fib6_table fib6_local_tbl = {
+ .tb6_id = RT6_TABLE_LOCAL,
+ .tb6_lock = RW_LOCK_UNLOCKED,
+ .tb6_root = {
+ .leaf = &ip6_null_entry,
+ .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+ },
+};
+
+static struct fib6_table *fib6_alloc_table(u32 id)
+{
+ struct fib6_table *table;
+
+ table = kzalloc(sizeof(*table), GFP_ATOMIC);
+ if (table != NULL) {
+ table->tb6_id = id;
+ table->tb6_lock = RW_LOCK_UNLOCKED;
+ table->tb6_root.leaf = &ip6_null_entry;
+ table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ }
+
+ return table;
+}
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+ struct fib6_table *tb;
+
+ if (id == 0)
+ id = RT6_TABLE_MAIN;
+ tb = fib6_get_table(id);
+ if (tb)
+ return tb;
+
+ tb = fib6_alloc_table(id);
+ if (tb != NULL)
+ fib6_link_table(tb);
+
+ return tb;
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+ struct fib6_table *tb;
+ struct hlist_node *node;
+ unsigned int h;
+
+ if (id == 0)
+ id = RT6_TABLE_MAIN;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
+ if (tb->tb6_id == id) {
+ rcu_read_unlock();
+ return tb;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void __init fib6_tables_init(void)
+{
+ fib6_link_table(&fib6_main_tbl);
+ fib6_link_table(&fib6_local_tbl);
+}
+
+#else
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+ return fib6_get_table(id);
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+ return &fib6_main_tbl;
+}
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+ pol_lookup_t lookup)
+{
+ return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+}
+
+static void __init fib6_tables_init(void)
+{
+ fib6_link_table(&fib6_main_tbl);
+}
+
+#endif
+
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->u.next) {
+ res = rt6_dump_route(rt, w->args);
+ if (res < 0) {
+ /* Frame is full, suspend walking */
+ w->leaf = rt;
+ return 1;
+ }
+ BUG_TRAP(res!=0);
+ }
+ w->leaf = NULL;
+ return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+ struct fib6_walker_t *w = (void*)cb->args[2];
+
+ if (w) {
+ cb->args[2] = 0;
+ kfree(w);
+ }
+ cb->done = (void*)cb->args[3];
+ cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+ fib6_dump_end(cb);
+ return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct fib6_walker_t *w;
+ int res;
+
+ w = (void *)cb->args[2];
+ w->root = &table->tb6_root;
+
+ if (cb->args[4] == 0) {
+ read_lock_bh(&table->tb6_lock);
+ res = fib6_walk(w);
+ read_unlock_bh(&table->tb6_lock);
+ if (res > 0)
+ cb->args[4] = 1;
+ } else {
+ read_lock_bh(&table->tb6_lock);
+ res = fib6_walk_continue(w);
+ read_unlock_bh(&table->tb6_lock);
+ if (res != 0) {
+ if (res < 0)
+ fib6_walker_unlink(w);
+ goto end;
+ }
+ fib6_walker_unlink(w);
+ cb->args[4] = 0;
+ }
+end:
+ return res;
+}
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct rt6_rtnl_dump_arg arg;
+ struct fib6_walker_t *w;
+ struct fib6_table *tb;
+ struct hlist_node *node;
+ int res = 0;
+
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+ w = (void *)cb->args[2];
+ if (w == NULL) {
+ /* New dump:
+ *
+ * 1. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
+ /*
+ * 2. allocate and initialize walker.
+ */
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (w == NULL)
+ return -ENOMEM;
+ w->func = fib6_dump_node;
+ cb->args[2] = (long)w;
+ }
+
+ arg.skb = skb;
+ arg.cb = cb;
+ w->args = &arg;
+
+ for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+ e = 0;
+ hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
+ if (e < s_e)
+ goto next;
+ res = fib6_dump_table(tb, skb, cb);
+ if (res != 0)
+ goto out;
+next:
+ e++;
+ }
+ }
+out:
+ cb->args[1] = e;
+ cb->args[0] = h;
+
+ res = res < 0 ? res : skb->len;
+ if (res <= 0)
+ fib6_dump_end(cb);
+ return res;
+}
/*
* Routing Table
@@ -344,7 +610,7 @@ insert_above:
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nlmsghdr *nlh, struct netlink_skb_parms *req)
+ struct nl_info *info)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -399,7 +665,7 @@ out:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
rt6_stats.fib_rt_entries++;
if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -429,10 +695,9 @@ void fib6_force_start_gc(void)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
@@ -441,6 +706,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (fn == NULL)
goto out;
+ pn = fn;
+
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
@@ -486,10 +753,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
/* Now link new subtree to main tree */
sfn->parent = fn;
fn->subtree = sfn;
- if (fn->leaf == NULL) {
- fn->leaf = rt;
- atomic_inc(&rt->rt6i_ref);
- }
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
sizeof(struct in6_addr), rt->rt6i_src.plen,
@@ -499,21 +762,42 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
goto st_failure;
}
+ if (fn->leaf == NULL) {
+ fn->leaf = rt;
+ atomic_inc(&rt->rt6i_ref);
+ }
fn = sn;
}
#endif
- err = fib6_add_rt2node(fn, rt, nlh, req);
+ err = fib6_add_rt2node(fn, rt, info);
if (err == 0) {
fib6_start_gc(rt);
if (!(rt->rt6i_flags&RTF_CACHE))
- fib6_prune_clones(fn, rt);
+ fib6_prune_clones(pn, rt);
}
out:
- if (err)
+ if (err) {
+#ifdef CONFIG_IPV6_SUBTREES
+ /*
+ * If fib6_add_1 has cleared the old leaf pointer in the
+ * super-tree leaf node we have to find a new one for it.
+ */
+ if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn->leaf = fib6_find_prefix(pn);
+#if RT6_DEBUG >= 2
+ if (!pn->leaf) {
+ BUG_TRAP(pn->leaf != NULL);
+ pn->leaf = &ip6_null_entry;
+ }
+#endif
+ atomic_inc(&pn->leaf->rt6i_ref);
+ }
+#endif
dst_free(&rt->u.dst);
+ }
return err;
#ifdef CONFIG_IPV6_SUBTREES
@@ -544,6 +828,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
struct fib6_node *fn;
int dir;
+ if (unlikely(args->offset == 0))
+ return NULL;
+
/*
* Descend on a tree
*/
@@ -565,33 +852,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
break;
}
- while ((fn->fn_flags & RTN_ROOT) == 0) {
-#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree) {
- struct fib6_node *st;
- struct lookup_args *narg;
-
- narg = args + 1;
-
- if (narg->addr) {
- st = fib6_lookup_1(fn->subtree, narg);
-
- if (st && !(st->fn_flags & RTN_ROOT))
- return st;
- }
- }
-#endif
-
- if (fn->fn_flags & RTN_RTINFO) {
+ while(fn) {
+ if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
struct rt6key *key;
key = (struct rt6key *) ((u8 *) fn->leaf +
args->offset);
- if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
- return fn;
+ if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+#ifdef CONFIG_IPV6_SUBTREES
+ if (fn->subtree)
+ fn = fib6_lookup_1(fn->subtree, args + 1);
+#endif
+ if (!fn || fn->fn_flags & RTN_RTINFO)
+ return fn;
+ }
}
+ if (fn->fn_flags & RTN_ROOT)
+ break;
+
fn = fn->parent;
}
@@ -601,18 +881,24 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
struct in6_addr *saddr)
{
- struct lookup_args args[2];
struct fib6_node *fn;
-
- args[0].offset = offsetof(struct rt6_info, rt6i_dst);
- args[0].addr = daddr;
-
+ struct lookup_args args[] = {
+ {
+ .offset = offsetof(struct rt6_info, rt6i_dst),
+ .addr = daddr,
+ },
#ifdef CONFIG_IPV6_SUBTREES
- args[1].offset = offsetof(struct rt6_info, rt6i_src);
- args[1].addr = saddr;
+ {
+ .offset = offsetof(struct rt6_info, rt6i_src),
+ .addr = saddr,
+ },
#endif
+ {
+ .offset = 0, /* sentinel */
+ }
+ };
- fn = fib6_lookup_1(root, args);
+ fn = fib6_lookup_1(root, daddr ? args : args + 1);
if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
fn = root;
@@ -668,10 +954,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
BUG_TRAP(saddr!=NULL);
- if (fn == NULL)
- fn = fn->subtree;
- if (fn)
- fn = fib6_locate_1(fn, saddr, src_len,
+ if (fn && fn->subtree)
+ fn = fib6_locate_1(fn->subtree, saddr, src_len,
offsetof(struct rt6_info, rt6i_src));
}
#endif
@@ -700,7 +984,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
if(fn->right)
return fn->right->leaf;
- fn = SUBTREE(fn);
+ fn = FIB6_SUBTREE(fn);
}
return NULL;
}
@@ -731,7 +1015,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
if (fn->right) child = fn->right, children |= 1;
if (fn->left) child = fn->left, children |= 2;
- if (children == 3 || SUBTREE(fn)
+ if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
/* Subtree root (i.e. fn) may have one child */
|| (children && fn->fn_flags&RTN_ROOT)
@@ -750,9 +1034,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
pn = fn->parent;
#ifdef CONFIG_IPV6_SUBTREES
- if (SUBTREE(pn) == fn) {
+ if (FIB6_SUBTREE(pn) == fn) {
BUG_TRAP(fn->fn_flags&RTN_ROOT);
- SUBTREE(pn) = NULL;
+ FIB6_SUBTREE(pn) = NULL;
nstate = FWS_L;
} else {
BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
@@ -800,7 +1084,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
read_unlock(&fib6_walker_lock);
node_free(fn);
- if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+ if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
rt6_release(pn->leaf);
@@ -810,7 +1094,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
}
static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
- struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+ struct nl_info *info)
{
struct fib6_walker_t *w;
struct rt6_info *rt = *rtp;
@@ -866,11 +1150,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (atomic_read(&rt->rt6i_ref) != 1) BUG();
}
- inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
+ inet6_rt_notify(RTM_DELROUTE, rt, info);
rt6_release(rt);
}
-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
struct fib6_node *fn = rt->rt6i_node;
struct rt6_info **rtp;
@@ -886,8 +1170,18 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
BUG_TRAP(fn->fn_flags&RTN_RTINFO);
- if (!(rt->rt6i_flags&RTF_CACHE))
- fib6_prune_clones(fn, rt);
+ if (!(rt->rt6i_flags&RTF_CACHE)) {
+ struct fib6_node *pn = fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ /* clones of this route might be in another subtree */
+ if (rt->rt6i_src.plen) {
+ while (!(pn->fn_flags&RTN_ROOT))
+ pn = pn->parent;
+ pn = pn->parent;
+ }
+#endif
+ fib6_prune_clones(pn, rt);
+ }
/*
* Walk the leaf entries looking for ourself
@@ -895,7 +1189,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
if (*rtp == rt) {
- fib6_del_route(fn, rtp, nlh, _rtattr, req);
+ fib6_del_route(fn, rtp, info);
return 0;
}
}
@@ -926,7 +1220,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
* <0 -> walk is terminated by an error.
*/
-int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker_t *w)
{
struct fib6_node *fn, *pn;
@@ -943,8 +1237,8 @@ int fib6_walk_continue(struct fib6_walker_t *w)
switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
case FWS_S:
- if (SUBTREE(fn)) {
- w->node = SUBTREE(fn);
+ if (FIB6_SUBTREE(fn)) {
+ w->node = FIB6_SUBTREE(fn);
continue;
}
w->state = FWS_L;
@@ -978,7 +1272,7 @@ int fib6_walk_continue(struct fib6_walker_t *w)
pn = fn->parent;
w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
- if (SUBTREE(pn) == fn) {
+ if (FIB6_SUBTREE(pn) == fn) {
BUG_TRAP(fn->fn_flags&RTN_ROOT);
w->state = FWS_L;
continue;
@@ -1000,7 +1294,7 @@ int fib6_walk_continue(struct fib6_walker_t *w)
}
}
-int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker_t *w)
{
int res;
@@ -1024,7 +1318,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
- res = fib6_del(rt, NULL, NULL, NULL);
+ res = fib6_del(rt, NULL);
if (res) {
#if RT6_DEBUG >= 2
printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1050,9 +1344,9 @@ static int fib6_clean_node(struct fib6_walker_t *w)
* ignoring pure split nodes) will be scanned.
*/
-void fib6_clean_tree(struct fib6_node *root,
- int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
+static void fib6_clean_tree(struct fib6_node *root,
+ int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
{
struct fib6_cleaner_t c;
@@ -1065,6 +1359,25 @@ void fib6_clean_tree(struct fib6_node *root,
fib6_walk(&c.w);
}
+void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_table *table;
+ struct hlist_node *node;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
+ tb6_hlist) {
+ write_lock_bh(&table->tb6_lock);
+ fib6_clean_tree(&table->tb6_root, func, prune, arg);
+ write_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
static int fib6_prune_clone(struct rt6_info *rt, void *arg)
{
if (rt->rt6i_flags & RTF_CACHE) {
@@ -1143,11 +1456,8 @@ void fib6_run_gc(unsigned long dummy)
}
gc_args.more = 0;
-
- write_lock_bh(&rt6_lock);
ndisc_dst_gc(&gc_args.more);
- fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
- write_unlock_bh(&rt6_lock);
+ fib6_clean_all(fib6_age, 0, NULL);
if (gc_args.more)
mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@@ -1162,10 +1472,10 @@ void __init fib6_init(void)
{
fib6_node_kmem = kmem_cache_create("fib6_nodes",
sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!fib6_node_kmem)
- panic("cannot create fib6_nodes cache");
+
+ fib6_tables_init();
}
void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f9ca63912fbf..1d672b0547f2 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -10,7 +10,6 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aceee252503d..6b8e6d76a58b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -71,6 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto out;
}
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+
/*
* Store incoming device index. When the packet will
* be queued, we cannot refer to skb->dev anymore.
@@ -84,14 +86,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
*/
IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
- if (skb->len < sizeof(struct ipv6hdr))
+ if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
- goto drop;
- }
-
hdr = skb->nh.ipv6h;
if (hdr->version != 6)
@@ -114,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
}
if (hdr->nexthdr == NEXTHDR_HOP) {
- if (ipv6_parse_hopopts(skb) < 0) {
+ if (ipv6_parse_hopopts(&skb) < 0) {
IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index abb94de33768..66716911962e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,7 +28,6 @@
* for datagram xmit
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -148,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb)
int ip6_output(struct sk_buff *skb)
{
- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
+ if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
else
@@ -230,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb->priority = sk->sk_priority;
mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || ipfragok) {
+ if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
dst_output);
@@ -309,6 +308,56 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
return 0;
}
+static int ip6_forward_proxy_check(struct sk_buff *skb)
+{
+ struct ipv6hdr *hdr = skb->nh.ipv6h;
+ u8 nexthdr = hdr->nexthdr;
+ int offset;
+
+ if (ipv6_ext_hdr(nexthdr)) {
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
+ if (offset < 0)
+ return 0;
+ } else
+ offset = sizeof(struct ipv6hdr);
+
+ if (nexthdr == IPPROTO_ICMPV6) {
+ struct icmp6hdr *icmp6;
+
+ if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+ return 0;
+
+ icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+
+ switch (icmp6->icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ /* For reaction involving unicast neighbor discovery
+ * message destined to the proxied address, pass it to
+ * input function.
+ */
+ return 1;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * The proxying router can't forward traffic sent to a link-local
+ * address, so signal the sender and discard the packet. This
+ * behavior is clarified by the MIPv6 specification.
+ */
+ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
+ dst_link_failure(skb);
+ return -1;
+ }
+
+ return 0;
+}
+
static inline int ip6_forward_finish(struct sk_buff *skb)
{
return dst_output(skb);
@@ -357,11 +406,24 @@ int ip6_forward(struct sk_buff *skb)
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0, skb->dev);
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
}
+ /* XXX: idev->cnf.proxy_ndp? */
+ if (ipv6_devconf.proxy_ndp &&
+ pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+ int proxied = ip6_forward_proxy_check(skb);
+ if (proxied > 0)
+ return ip6_input(skb);
+ else if (proxied < 0) {
+ IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ }
+
if (!xfrm6_route_forward(skb)) {
IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -475,17 +537,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
switch (**nexthdr) {
case NEXTHDR_HOP:
+ break;
case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
case NEXTHDR_DEST:
- if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
- if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+#ifdef CONFIG_IPV6_MIP6
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+#endif
+ if (found_rhdr)
+ return offset;
break;
default :
return offset;
}
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
}
return offset;
@@ -596,6 +666,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
}
err = output(skb);
+ if(!err)
+ IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+
if (err || !frag)
break;
@@ -707,12 +780,11 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
-
- IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
err = output(frag);
if (err)
goto fail;
+
+ IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
@@ -724,48 +796,59 @@ fail:
return err;
}
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+static inline int ip6_rt_check(struct rt6key *rt_key,
+ struct in6_addr *fl_addr,
+ struct in6_addr *addr_cache)
{
- int err = 0;
+ return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+ (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
+}
- *dst = NULL;
- if (sk) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- *dst = sk_dst_check(sk, np->dst_cookie);
- if (*dst) {
- struct rt6_info *rt = (struct rt6_info*)*dst;
-
- /* Yes, checking route validity in not connected
- * case is not very simple. Take into account,
- * that we do not support routing by source, TOS,
- * and MSG_DONTROUTE --ANK (980726)
- *
- * 1. If route was host route, check that
- * cached destination is current.
- * If it is network route, we still may
- * check its validity using saved pointer
- * to the last used address: daddr_cache.
- * We do not want to save whole address now,
- * (because main consumer of this service
- * is tcp, which has not this problem),
- * so that the last trick works only on connected
- * sockets.
- * 2. oif also should be the same.
- */
- if (((rt->rt6i_dst.plen != 128 ||
- !ipv6_addr_equal(&fl->fl6_dst,
- &rt->rt6i_dst.addr))
- && (np->daddr_cache == NULL ||
- !ipv6_addr_equal(&fl->fl6_dst,
- np->daddr_cache)))
- || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
- dst_release(*dst);
- *dst = NULL;
- }
- }
+static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+ struct dst_entry *dst,
+ struct flowi *fl)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct rt6_info *rt = (struct rt6_info *)dst;
+
+ if (!dst)
+ goto out;
+
+ /* Yes, checking route validity in not connected
+ * case is not very simple. Take into account,
+ * that we do not support routing by source, TOS,
+ * and MSG_DONTROUTE --ANK (980726)
+ *
+ * 1. ip6_rt_check(): If route was host route,
+ * check that cached destination is current.
+ * If it is network route, we still may
+ * check its validity using saved pointer
+ * to the last used address: daddr_cache.
+ * We do not want to save whole address now,
+ * (because main consumer of this service
+ * is tcp, which has not this problem),
+ * so that the last trick works only on connected
+ * sockets.
+ * 2. oif also should be the same.
+ */
+ if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
+#ifdef CONFIG_IPV6_SUBTREES
+ ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
+#endif
+ (fl->oif && fl->oif != dst->dev->ifindex)) {
+ dst_release(dst);
+ dst = NULL;
}
+out:
+ return dst;
+}
+
+static int ip6_dst_lookup_tail(struct sock *sk,
+ struct dst_entry **dst, struct flowi *fl)
+{
+ int err;
+
if (*dst == NULL)
*dst = ip6_route_output(sk, fl);
@@ -774,7 +857,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
if (ipv6_addr_any(&fl->fl6_src)) {
err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
-
if (err)
goto out_err_release;
}
@@ -787,8 +869,48 @@ out_err_release:
return err;
}
+/**
+ * ip6_dst_lookup - perform route lookup on flow
+ * @sk: socket which provides route info
+ * @dst: pointer to dst_entry * for result
+ * @fl: flow to lookup
+ *
+ * This function performs a route lookup on the given flow.
+ *
+ * It returns zero on success, or a standard errno code on error.
+ */
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+{
+ *dst = NULL;
+ return ip6_dst_lookup_tail(sk, dst, fl);
+}
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+/**
+ * ip6_sk_dst_lookup - perform socket cached route lookup on flow
+ * @sk: socket which provides the dst cache and route info
+ * @dst: pointer to dst_entry * for result
+ * @fl: flow to lookup
+ *
+ * This function performs a route lookup on the given flow with the
+ * possibility of using the cached route in the socket if it is valid.
+ * It will take the socket dst lock when operating on the dst cache.
+ * As a result, this function can only be used in process context.
+ *
+ * It returns zero on success, or a standard errno code on error.
+ */
+int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+{
+ *dst = NULL;
+ if (sk) {
+ *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+ *dst = ip6_sk_dst_check(sk, *dst, fl);
+ }
+
+ return ip6_dst_lookup_tail(sk, dst, fl);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
+
static inline int ip6_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
@@ -822,7 +944,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->h.raw = skb->data + fragheaderlen;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
sk->sk_sndmsg_off = 0;
}
@@ -835,7 +957,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
sizeof(struct frag_hdr);
- skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
ipv6_select_ident(skb, &fhdr);
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
@@ -919,7 +1041,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
- fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+ fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
@@ -1051,7 +1173,7 @@ alloc_new_skb:
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
- skb_trim(skb_prev, maxfraglen);
+ pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
if (copy < 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a995796b5a57..84d7ebdb9d21 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -19,7 +19,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
@@ -568,10 +567,9 @@ static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
int opt_len = sizeof(*opt) + 8;
- if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
+ if (!(opt = kzalloc(opt_len, GFP_ATOMIC))) {
return NULL;
}
- memset(opt, 0, opt_len);
opt->tot_len = opt_len;
opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
opt->opt_nflen = 8;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index f28cd37feed3..a2860e35efd7 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -30,7 +30,6 @@
* The decompression of IP datagram MUST be done after the reassembly,
* AH/ESP processing.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -54,7 +53,7 @@
struct ipcomp6_tfms {
struct list_head list;
- struct crypto_tfm **tfms;
+ struct crypto_comp **tfms;
int users;
};
@@ -71,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
int plen, dlen;
struct ipcomp_data *ipcd = x->data;
u8 *start, *scratch;
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
int cpu;
if (skb_linearize_cow(skb))
@@ -110,7 +109,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out_put_cpu;
}
- skb_put(skb, dlen - plen);
+ skb->truesize += dlen - plen;
+ __skb_put(skb, dlen - plen);
memcpy(skb->data, scratch, dlen);
err = ipch->nexthdr;
@@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ipcomp_data *ipcd = x->data;
int plen, dlen;
u8 *start, *scratch;
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
int cpu;
hdr_len = skb->h.raw - skb->data;
@@ -178,7 +178,7 @@ out_ok:
static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int type, int code, int offset, __u32 info)
{
- u32 spi;
+ __be32 spi;
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
struct xfrm_state *x;
@@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET6;
- t->props.mode = 1;
+ t->props.mode = XFRM_MODE_TUNNEL;
memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
if (xfrm_init_state(t))
@@ -234,7 +234,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x)
{
int err = 0;
struct xfrm_state *t = NULL;
- u32 spi;
+ __be32 spi;
spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
if (spi)
@@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void)
return scratches;
}
-static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp6_free_tfms(struct crypto_comp **tfms)
{
struct ipcomp6_tfms *pos;
int cpu;
@@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
return;
for_each_possible_cpu(cpu) {
- struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
- crypto_free_tfm(tfm);
+ struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+ crypto_free_comp(tfm);
}
free_percpu(tfms);
}
-static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
{
struct ipcomp6_tfms *pos;
- struct crypto_tfm **tfms;
+ struct crypto_comp **tfms;
int cpu;
/* This can be any valid CPU ID so we don't need locking. */
cpu = raw_smp_processor_id();
list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
- struct crypto_tfm *tfm;
+ struct crypto_comp *tfm;
tfms = pos->tfms;
tfm = *per_cpu_ptr(tfms, cpu);
- if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+ if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
return tfms;
}
@@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
INIT_LIST_HEAD(&pos->list);
list_add(&pos->list, &ipcomp6_tfms_list);
- pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+ pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
if (!tfms)
goto error;
for_each_possible_cpu(cpu) {
- struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+ struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+ CRYPTO_ALG_ASYNC);
if (!tfm)
goto error;
*per_cpu_ptr(tfms, cpu) = tfm;
@@ -416,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
goto out;
x->props.header_len = 0;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct ipv6hdr);
mutex_lock(&ipcomp6_resource_mutex);
@@ -428,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
goto error;
mutex_unlock(&ipcomp6_resource_mutex);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = ipcomp6_tunnel_attach(x);
if (err)
goto error_tunnel;
@@ -460,6 +461,7 @@ static struct xfrm_type ipcomp6_type =
.destructor = ipcomp6_destroy,
.input = ipcomp6_input,
.output = ipcomp6_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
};
static struct inet6_protocol ipcomp6_protocol =
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4c20eeb3d568..de6b91981b30 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -58,9 +57,116 @@
DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
+static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb,
+ int proto)
+{
+ struct inet6_protocol *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_protos[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = opth->hdrlen * 8 + 8;
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return ops;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ struct inet6_protocol *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = skb->nh.ipv6h;
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (likely(ops && ops->gso_send_check)) {
+ skb->h.raw = skb->data;
+ err = ops->gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ struct inet6_protocol *ops;
+
+ if (!(features & NETIF_F_HW_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = skb->nh.ipv6h;
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ rcu_read_lock();
+ ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (likely(ops && ops->gso_segment)) {
+ skb->h.raw = skb->data;
+ segs = ops->gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (unlikely(IS_ERR(segs)))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = skb->nh.ipv6h;
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ }
+
+out:
+ return segs;
+}
+
static struct packet_type ipv6_packet_type = {
.type = __constant_htons(ETH_P_IPV6),
.func = ipv6_rcv,
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
};
struct ip6_ra_chain *ip6_ra_chain;
@@ -259,7 +365,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TCLASS:
- if (val < 0 || val > 0xff)
+ if (val < -1 || val > 0xff)
goto e_inval;
np->tclass = val;
retv = 0;
@@ -304,8 +410,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* routing header option needs extra check */
if (optname == IPV6_RTHDR && opt->srcrt) {
struct ipv6_rt_hdr *rthdr = opt->srcrt;
- if (rthdr->type)
+ switch (rthdr->type) {
+ case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+#endif
+ break;
+ default:
goto sticky_done;
+ }
+
if ((rthdr->hdrlen & 1) ||
(rthdr->hdrlen >> 1) != rthdr->segments_left)
goto sticky_done;
@@ -844,6 +958,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_TCLASS:
val = np->tclass;
+ if (val < 0)
+ val = 0;
break;
case IPV6_RECVTCLASS:
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 16482785bdfd..0e8e0676a033 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -1,5 +1,4 @@
-#include <linux/config.h>
#include <linux/module.h>
#include <net/protocol.h>
#include <net/ipv6.h>
@@ -15,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map);
EXPORT_SYMBOL(register_inet6addr_notifier);
EXPORT_SYMBOL(unregister_inet6addr_notifier);
EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(addrconf_lock);
EXPORT_SYMBOL(ipv6_setsockopt);
EXPORT_SYMBOL(ipv6_getsockopt);
EXPORT_SYMBOL(inet6_register_protosw);
@@ -32,6 +30,8 @@ EXPORT_SYMBOL(ipv6_chk_addr);
EXPORT_SYMBOL(in6_dev_finish_destroy);
#ifdef CONFIG_XFRM
EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_input_addr);
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
#endif
EXPORT_SYMBOL(rt6_lookup);
EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6e871afbb2c7..3b114e3fa2f8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -28,7 +28,6 @@
* - MLDv2 support
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -172,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
#define IPV6_MLD_MAX_MSF 64
-int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
/*
* socket join on multicast group
@@ -269,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
struct inet6_dev *idev = in6_dev_get(dev);
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
if (idev) {
- (void) ip6_mc_leave_src(sk,mc_lst,idev);
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
in6_dev_put(idev);
}
dev_put(dev);
- }
+ } else
+ (void) ip6_mc_leave_src(sk, mc_lst, NULL);
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return 0;
}
@@ -335,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk)
if (dev) {
struct inet6_dev *idev = in6_dev_get(dev);
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
if (idev) {
- (void) ip6_mc_leave_src(sk, mc_lst, idev);
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
in6_dev_put(idev);
}
dev_put(dev);
- }
+ } else
+ (void) ip6_mc_leave_src(sk, mc_lst, NULL);
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 000000000000..99d116caecda
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * Authors:
+ * Noriaki TAKAMIYA @USAGI
+ * Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/time.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+#include <net/mip6.h>
+
+static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
+{
+ return x->coaddr;
+}
+
+static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
+{
+ return (n - len + 16) & 0x7;
+}
+
+static inline void *mip6_padn(__u8 *data, __u8 padlen)
+{
+ if (!data)
+ return NULL;
+ if (padlen == 1) {
+ data[0] = MIP6_OPT_PAD_1;
+ } else if (padlen > 1) {
+ data[0] = MIP6_OPT_PAD_N;
+ data[1] = padlen - 2;
+ if (padlen > 2)
+ memset(data+2, 0, data[1]);
+ }
+ return data + padlen;
+}
+
+static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
+{
+ icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+}
+
+static int mip6_mh_len(int type)
+{
+ int len = 0;
+
+ switch (type) {
+ case IP6_MH_TYPE_BRR:
+ len = 0;
+ break;
+ case IP6_MH_TYPE_HOTI:
+ case IP6_MH_TYPE_COTI:
+ case IP6_MH_TYPE_BU:
+ case IP6_MH_TYPE_BACK:
+ len = 1;
+ break;
+ case IP6_MH_TYPE_HOT:
+ case IP6_MH_TYPE_COT:
+ case IP6_MH_TYPE_BERROR:
+ len = 2;
+ break;
+ }
+ return len;
+}
+
+int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct ip6_mh *mh;
+ int mhlen;
+
+ if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
+ !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+ return -1;
+
+ mh = (struct ip6_mh *)skb->h.raw;
+
+ if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+ mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+ mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+ return -1;
+ }
+ mhlen = (mh->ip6mh_hdrlen + 1) << 3;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr,
+ mhlen, IPPROTO_MH,
+ skb->csum)) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n");
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ }
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr,
+ mhlen, IPPROTO_MH,
+ skb_checksum(skb, 0, mhlen, 0))) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed "
+ "[" NIP6_FMT " > " NIP6_FMT "]\n",
+ NIP6(skb->nh.ipv6h->saddr),
+ NIP6(skb->nh.ipv6h->daddr));
+ return -1;
+ }
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ if (mh->ip6mh_proto != IPPROTO_NONE) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+ mh->ip6mh_proto);
+ mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+ return -1;
+ }
+
+ return 0;
+}
+
+struct mip6_report_rate_limiter {
+ spinlock_t lock;
+ struct timeval stamp;
+ int iif;
+ struct in6_addr src;
+ struct in6_addr dst;
+};
+
+static struct mip6_report_rate_limiter mip6_report_rl = {
+ .lock = SPIN_LOCK_UNLOCKED
+};
+
+static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+
+ if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+ !ipv6_addr_any((struct in6_addr *)x->coaddr))
+ return -ENOENT;
+
+ return destopt->nexthdr;
+}
+
+/* Destination Option Header is inserted.
+ * IP Header's src address is replaced with Home Address Option in
+ * Destination Option Header.
+ */
+static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ struct ipv6_destopt_hdr *dstopt;
+ struct ipv6_destopt_hao *hao;
+ u8 nexthdr;
+ int len;
+
+ iph = (struct ipv6hdr *)skb->data;
+ iph->payload_len = htons(skb->len - sizeof(*iph));
+
+ nexthdr = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_DSTOPTS;
+
+ dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+ dstopt->nexthdr = nexthdr;
+
+ hao = mip6_padn((char *)(dstopt + 1),
+ calc_padlen(sizeof(*dstopt), 6));
+
+ hao->type = IPV6_TLV_HAO;
+ hao->length = sizeof(*hao) - 2;
+ BUG_TRAP(hao->length == 16);
+
+ len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
+
+ memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
+ memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+
+ BUG_TRAP(len == x->props.header_len);
+ dstopt->hdrlen = (x->props.header_len >> 3) - 1;
+
+ return 0;
+}
+
+static inline int mip6_report_rl_allow(struct timeval *stamp,
+ struct in6_addr *dst,
+ struct in6_addr *src, int iif)
+{
+ int allow = 0;
+
+ spin_lock_bh(&mip6_report_rl.lock);
+ if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
+ mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+ mip6_report_rl.iif != iif ||
+ !ipv6_addr_equal(&mip6_report_rl.src, src) ||
+ !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
+ mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
+ mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+ mip6_report_rl.iif = iif;
+ ipv6_addr_copy(&mip6_report_rl.src, src);
+ ipv6_addr_copy(&mip6_report_rl.dst, dst);
+ allow = 1;
+ }
+ spin_unlock_bh(&mip6_report_rl.lock);
+ return allow;
+}
+
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+{
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ struct ipv6_destopt_hao *hao = NULL;
+ struct xfrm_selector sel;
+ int offset;
+ struct timeval stamp;
+ int err = 0;
+
+ if (unlikely(fl->proto == IPPROTO_MH &&
+ fl->fl_mh_type <= IP6_MH_TYPE_MAX))
+ goto out;
+
+ if (likely(opt->dsthao)) {
+ offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+ if (likely(offset >= 0))
+ hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+ }
+
+ skb_get_timestamp(skb, &stamp);
+
+ if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
+ hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+ opt->iif))
+ goto out;
+
+ memset(&sel, 0, sizeof(sel));
+ memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+ sizeof(sel.daddr));
+ sel.prefixlen_d = 128;
+ memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ sizeof(sel.saddr));
+ sel.prefixlen_s = 128;
+ sel.family = AF_INET6;
+ sel.proto = fl->proto;
+ sel.dport = xfrm_flowi_dport(fl);
+ if (sel.dport)
+ sel.dport_mask = ~((__u16)0);
+ sel.sport = xfrm_flowi_sport(fl);
+ if (sel.sport)
+ sel.sport_mask = ~((__u16)0);
+ sel.ifindex = fl->oif;
+
+ err = km_report(IPPROTO_DSTOPTS, &sel,
+ (hao ? (xfrm_address_t *)&hao->addr : NULL));
+
+ out:
+ return err;
+}
+
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+ unsigned int packet_len = skb->tail - skb->nh.raw;
+ int found_rhdr = 0;
+
+ *nexthdr = &skb->nh.ipv6h->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ /*
+ * HAO MUST NOT appear more than once.
+ * XXX: It is better to try to find by the end of
+ * XXX: packet if HAO exists.
+ */
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+ LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+ return offset;
+ }
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ }
+
+ return offset;
+}
+
+static int mip6_destopt_init_state(struct xfrm_state *x)
+{
+ if (x->id.spi) {
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+ x->id.spi);
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+ printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+ __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ return -EINVAL;
+ }
+
+ x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
+ calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
+ sizeof(struct ipv6_destopt_hao);
+ BUG_TRAP(x->props.header_len == 24);
+
+ return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for
+ * destination options header unlike IPsec protocols.
+ */
+static void mip6_destopt_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_destopt_type =
+{
+ .description = "MIP6DESTOPT",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_DSTOPTS,
+ .flags = XFRM_TYPE_NON_FRAGMENT,
+ .init_state = mip6_destopt_init_state,
+ .destructor = mip6_destopt_destroy,
+ .input = mip6_destopt_input,
+ .output = mip6_destopt_output,
+ .reject = mip6_destopt_reject,
+ .hdr_offset = mip6_destopt_offset,
+ .local_addr = mip6_xfrm_addr,
+};
+
+static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+
+ if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+ !ipv6_addr_any((struct in6_addr *)x->coaddr))
+ return -ENOENT;
+
+ return rt2->rt_hdr.nexthdr;
+}
+
+/* Routing Header type 2 is inserted.
+ * IP Header's dst address is replaced with Routing Header's Home Address.
+ */
+static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ struct rt2_hdr *rt2;
+ u8 nexthdr;
+
+ iph = (struct ipv6hdr *)skb->data;
+ iph->payload_len = htons(skb->len - sizeof(*iph));
+
+ nexthdr = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_ROUTING;
+
+ rt2 = (struct rt2_hdr *)skb->h.raw;
+ rt2->rt_hdr.nexthdr = nexthdr;
+ rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
+ rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
+ rt2->rt_hdr.segments_left = 1;
+ memset(&rt2->reserved, 0, sizeof(rt2->reserved));
+
+ BUG_TRAP(rt2->rt_hdr.hdrlen == 2);
+
+ memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
+ memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+
+ return 0;
+}
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+ unsigned int packet_len = skb->tail - skb->nh.raw;
+ int found_rhdr = 0;
+
+ *nexthdr = &skb->nh.ipv6h->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ if (offset + 3 <= packet_len) {
+ struct ipv6_rt_hdr *rt;
+ rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+ if (rt->type != 0)
+ return offset;
+ }
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ return offset;
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ }
+
+ return offset;
+}
+
+static int mip6_rthdr_init_state(struct xfrm_state *x)
+{
+ if (x->id.spi) {
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+ x->id.spi);
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+ printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+ __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ return -EINVAL;
+ }
+
+ x->props.header_len = sizeof(struct rt2_hdr);
+
+ return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for routing
+ * header type 2 unlike IPsec protocols.
+ */
+static void mip6_rthdr_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_rthdr_type =
+{
+ .description = "MIP6RT",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ROUTING,
+ .flags = XFRM_TYPE_NON_FRAGMENT,
+ .init_state = mip6_rthdr_init_state,
+ .destructor = mip6_rthdr_destroy,
+ .input = mip6_rthdr_input,
+ .output = mip6_rthdr_output,
+ .hdr_offset = mip6_rthdr_offset,
+ .remote_addr = mip6_xfrm_addr,
+};
+
+int __init mip6_init(void)
+{
+ printk(KERN_INFO "Mobile IPv6\n");
+
+ if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
+ printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__);
+ goto mip6_destopt_xfrm_fail;
+ }
+ if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
+ printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+ goto mip6_rthdr_xfrm_fail;
+ }
+ return 0;
+
+ mip6_rthdr_xfrm_fail:
+ xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
+ return -EAGAIN;
+}
+
+void __exit mip6_fini(void)
+{
+ if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+ if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index dfa20d3be9b6..0304b5fe8d6a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -48,7 +48,6 @@
#endif
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -63,6 +62,7 @@
#include <linux/sysctl.h>
#endif
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
@@ -412,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n)
*/
static inline void ndisc_flow_init(struct flowi *fl, u8 type,
- struct in6_addr *saddr, struct in6_addr *daddr)
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ int oif)
{
memset(fl, 0, sizeof(*fl));
ipv6_addr_copy(&fl->fl6_src, saddr);
@@ -420,6 +421,8 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
fl->proto = IPPROTO_ICMPV6;
fl->fl_icmp_type = type;
fl->fl_icmp_code = 0;
+ fl->oif = oif;
+ security_sk_classify_flow(ndisc_socket->sk, fl);
}
static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
@@ -451,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
src_addr = &tmpaddr;
}
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+ ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+ dev->ifindex);
dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
if (!dst)
@@ -492,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
msg->icmph.icmp6_unused = 0;
msg->icmph.icmp6_router = router;
msg->icmph.icmp6_solicited = solicited;
- msg->icmph.icmp6_override = !!override;
+ msg->icmph.icmp6_override = override;
/* Set the target address. */
ipv6_addr_copy(&msg->target, solicited_addr);
@@ -541,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
saddr = &addr_buf;
}
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+ ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
+ dev->ifindex);
dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
if (!dst)
@@ -616,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
int len;
int err;
- ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+ ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
+ dev->ifindex);
dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
if (!dst)
@@ -730,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb)
struct inet6_ifaddr *ifp;
struct inet6_dev *idev = NULL;
struct neighbour *neigh;
+ struct pneigh_entry *pneigh = NULL;
int dad = ipv6_addr_any(saddr);
int inc;
+ int is_router;
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK2(KERN_WARNING
@@ -816,7 +824,9 @@ static void ndisc_recv_ns(struct sk_buff *skb)
if (ipv6_chk_acast_addr(dev, &msg->target) ||
(idev->cnf.forwarding &&
- pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+ (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
+ (pneigh = pneigh_lookup(&nd_tbl,
+ &msg->target, dev, 0)) != NULL)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
inc != 0 &&
@@ -837,12 +847,14 @@ static void ndisc_recv_ns(struct sk_buff *skb)
goto out;
}
+ is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
+
if (dad) {
struct in6_addr maddr;
ipv6_addr_all_nodes(&maddr);
ndisc_send_na(dev, NULL, &maddr, &msg->target,
- idev->cnf.forwarding, 0, (ifp != NULL), 1);
+ is_router, 0, (ifp != NULL), 1);
goto out;
}
@@ -863,7 +875,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NEIGH_UPDATE_F_OVERRIDE);
if (neigh || !dev->hard_header) {
ndisc_send_na(dev, neigh, saddr, &msg->target,
- idev->cnf.forwarding,
+ is_router,
1, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
@@ -946,6 +958,20 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (neigh->nud_state & NUD_FAILED)
goto out;
+ /*
+ * Don't update the neighbor cache entry on a proxy NA from
+ * ourselves because either the proxied node is off link or it
+ * has already sent a NA to us.
+ */
+ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+ ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
+ pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+ /* XXX: idev->cnf.prixy_ndp */
+ WARN_ON(skb->dst != NULL &&
+ ((struct rt6_info *)skb->dst)->rt6i_idev);
+ goto out;
+ }
+
neigh_update(neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
@@ -960,7 +986,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct rt6_info *rt;
rt = rt6_get_dflt_router(saddr, dev);
if (rt)
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
}
out:
@@ -1113,7 +1139,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt && lifetime == 0) {
neigh_clone(neigh);
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
rt = NULL;
}
@@ -1345,7 +1371,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
if (neigh) {
- rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr,
+ rt6_redirect(dest, &skb->nh.ipv6h->daddr,
+ &skb->nh.ipv6h->saddr, neigh, lladdr,
on_link);
neigh_release(neigh);
}
@@ -1381,7 +1408,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
return;
}
- ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+ ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+ dev->ifindex);
dst = ip6_route_output(NULL, &fl);
if (dst == NULL)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 395a417ba955..580b1aba6722 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int csum = 0;
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
break;
if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index eeeb57d4c9c5..ac1dfebde175 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -5,7 +5,7 @@
# Link order matters here.
obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
-obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index b4b7d441af25..9510c24ca8d2 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -56,15 +56,15 @@ struct ipq_queue_entry {
typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
static LIST_HEAD(queue_list);
static DEFINE_MUTEX(ipqnl_mutex);
@@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
break;
case IPQ_COPY_PACKET:
- if (entry->skb->ip_summed == CHECKSUM_HW &&
- (*errp = skb_checksum_help(entry->skb,
- entry->info->outdev == NULL))) {
+ if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+ entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+ (*errp = skb_checksum_help(entry->skb))) {
read_unlock_bh(&queue_lock);
return NULL;
}
@@ -505,7 +505,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (type <= IPQM_BASE)
return;
- if (security_netlink_recv(skb))
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
write_lock_bh(&queue_lock);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2e72f89a7019..4ab368fa0b8f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -19,13 +19,13 @@
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/in.h>
#include <linux/skbuff.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
#include <linux/netdevice.h>
#include <linux/module.h>
+#include <linux/poison.h>
#include <linux/icmpv6.h>
#include <net/ipv6.h>
#include <asm/uaccess.h>
@@ -70,9 +70,6 @@ do { \
#define IP_NF_ASSERT(x)
#endif
-
-#include <linux/netfilter_ipv4/listhelp.h>
-
#if 0
/* All the better to debug you with... */
#define static
@@ -220,8 +217,7 @@ ip6t_error(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
if (net_ratelimit())
printk("ip6_tables: error: `%s'\n", (char *)targinfo);
@@ -258,8 +254,7 @@ ip6t_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct xt_table *table,
- void *userdata)
+ struct xt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
int offset = 0;
@@ -349,8 +344,7 @@ ip6t_do_table(struct sk_buff **pskb,
in, out,
hook,
t->u.kernel.target,
- t->data,
- userdata);
+ t->data);
#ifdef CONFIG_NETFILTER_DEBUG
if (((struct ip6t_entry *)table_base)->comefrom
@@ -377,7 +371,7 @@ ip6t_do_table(struct sk_buff **pskb,
} while (!hotdrop);
#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
+ ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif
read_unlock_bh(&table->lock);
@@ -507,8 +501,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
return 1;
if (m->u.kernel.match->destroy)
- m->u.kernel.match->destroy(m->u.kernel.match, m->data,
- m->u.match_size - sizeof(*m));
+ m->u.kernel.match->destroy(m->u.kernel.match, m->data);
module_put(m->u.kernel.match->me);
return 0;
}
@@ -561,7 +554,6 @@ check_match(struct ip6t_entry_match *m,
if (m->u.kernel.match->checkentry
&& !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
- m->u.match_size - sizeof(*m),
hookmask)) {
duprintf("ip_tables: check failed for `%s'.\n",
m->u.kernel.match->name);
@@ -618,12 +610,10 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
if (t->u.kernel.target == &ip6t_standard_target) {
if (!standard_check(t, size)) {
ret = -EINVAL;
- goto cleanup_matches;
+ goto err;
}
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
- t->u.target_size
- - sizeof(*t),
e->comefrom)) {
duprintf("ip_tables: check failed for `%s'.\n",
t->u.kernel.target->name);
@@ -695,8 +685,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
t = ip6t_get_target(e);
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
return 0;
}
@@ -1281,7 +1270,8 @@ int ip6t_register_table(struct xt_table *table,
return ret;
}
- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+ ret = xt_register_table(table, &bootstrap, newinfo);
+ if (ret != 0) {
xt_free_table_info(newinfo);
return ret;
}
@@ -1351,7 +1341,6 @@ icmp6_checkentry(const char *tablename,
const void *entry,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ip6t_icmp *icmpinfo = matchinfo;
@@ -1397,23 +1386,39 @@ static int __init ip6_tables_init(void)
{
int ret;
- xt_proto_init(AF_INET6);
+ ret = xt_proto_init(AF_INET6);
+ if (ret < 0)
+ goto err1;
/* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(&ip6t_standard_target);
- xt_register_target(&ip6t_error_target);
- xt_register_match(&icmp6_matchstruct);
+ ret = xt_register_target(&ip6t_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&ip6t_error_target);
+ if (ret < 0)
+ goto err3;
+ ret = xt_register_match(&icmp6_matchstruct);
+ if (ret < 0)
+ goto err4;
/* Register setsockopt */
ret = nf_register_sockopt(&ip6t_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- xt_proto_fini(AF_INET6);
- return ret;
- }
+ if (ret < 0)
+ goto err5;
printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
return 0;
+
+err5:
+ xt_unregister_match(&icmp6_matchstruct);
+err4:
+ xt_unregister_target(&ip6t_error_target);
+err3:
+ xt_unregister_target(&ip6t_standard_target);
+err2:
+ xt_proto_fini(AF_INET6);
+err1:
+ return ret;
}
static void __exit ip6_tables_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index b8eff8ee69b1..435750f664dd 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -22,11 +22,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct ipv6hdr *ip6h;
const struct ip6t_HL_info *info = targinfo;
- u_int16_t diffs[2];
int new_hl;
if (!skb_make_writable(pskb, (*pskb)->len))
@@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
break;
}
- if (new_hl != ip6h->hop_limit) {
- diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
+ if (new_hl != ip6h->hop_limit)
ip6h->hop_limit = new_hl;
- diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
- }
return IP6T_CONTINUE;
}
@@ -66,7 +62,6 @@ static int ip6t_hl_checkentry(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ip6t_HL_info *info = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 73c6300109d6..0cf537d30185 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ip6t_log_info *loginfo = targinfo;
struct nf_loginfo li;
@@ -452,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip6t_log_info *loginfo = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index de1175c27f6d..311eae82feb3 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -15,7 +15,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmpv6.h>
@@ -97,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb)
ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
fl.fl_ip_sport = otcph.dest;
fl.fl_ip_dport = otcph.source;
+ security_skb_classify_flow(oldskb, &fl);
dst = ip6_route_output(NULL, &fl);
if (dst == NULL)
return;
@@ -180,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ip6t_reject_info *reject = targinfo;
@@ -224,7 +223,6 @@ static int check(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip6t_reject_info *rejinfo = targinfo;
@@ -257,9 +255,7 @@ static struct ip6t_target ip6t_reject_reg = {
static int __init ip6t_reject_init(void)
{
- if (ip6t_register_target(&ip6t_reject_reg))
- return -EINVAL;
- return 0;
+ return ip6t_register_target(&ip6t_reject_reg);
}
static void __exit ip6t_reject_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2f7bb20c758b..ec1b1608156c 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -102,7 +102,6 @@ checkentry(const char *tablename,
const void *entry,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ip6t_ah *ahinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
deleted file mode 100644
index 9422413d0571..000000000000
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Kernel module to match Hop-by-Hop and Destination parameters. */
-
-/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <net/checksum.h>
-#include <net/ipv6.h>
-
-#include <asm/byteorder.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_opts.h>
-
-#define HOPBYHOP 0
-
-MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
-MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/*
- * (Type & 0xC0) >> 6
- * 0 -> ignorable
- * 1 -> must drop the packet
- * 2 -> send ICMP PARM PROB regardless and drop packet
- * 3 -> Send ICMP if not a multicast address and drop packet
- * (Type & 0x20) >> 5
- * 0 -> invariant
- * 1 -> can change the routing
- * (Type & 0x1F) Type
- * 0 -> Pad1 (only 1 byte!)
- * 1 -> PadN LENGTH info (total length = length + 2)
- * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k )
- * 5 -> RTALERT 2 x x
- */
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- struct ipv6_opt_hdr _optsh, *oh;
- const struct ip6t_opts *optinfo = matchinfo;
- unsigned int temp;
- unsigned int ptr;
- unsigned int hdrlen = 0;
- unsigned int ret = 0;
- u8 _opttype, *tp = NULL;
- u8 _optlen, *lp = NULL;
- unsigned int optlen;
-
-#if HOPBYHOP
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
- return 0;
-
- oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
- if (oh == NULL) {
- *hotdrop = 1;
- return 0;
- }
-
- hdrlen = ipv6_optlen(oh);
- if (skb->len - ptr < hdrlen) {
- /* Packet smaller than it's length field */
- return 0;
- }
-
- DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
- DEBUGP("len %02X %04X %02X ",
- optinfo->hdrlen, hdrlen,
- (!(optinfo->flags & IP6T_OPTS_LEN) ||
- ((optinfo->hdrlen == hdrlen) ^
- !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
- ret = (oh != NULL) &&
- (!(optinfo->flags & IP6T_OPTS_LEN) ||
- ((optinfo->hdrlen == hdrlen) ^
- !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
- ptr += 2;
- hdrlen -= 2;
- if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
- return ret;
- } else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
- DEBUGP("Not strict - not implemented");
- } else {
- DEBUGP("Strict ");
- DEBUGP("#%d ", optinfo->optsnr);
- for (temp = 0; temp < optinfo->optsnr; temp++) {
- /* type field exists ? */
- if (hdrlen < 1)
- break;
- tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
- &_opttype);
- if (tp == NULL)
- break;
-
- /* Type check */
- if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
- DEBUGP("Tbad %02X %02X\n",
- *tp,
- (optinfo->opts[temp] & 0xFF00) >> 8);
- return 0;
- } else {
- DEBUGP("Tok ");
- }
- /* Length check */
- if (*tp) {
- u16 spec_len;
-
- /* length field exists ? */
- if (hdrlen < 2)
- break;
- lp = skb_header_pointer(skb, ptr + 1,
- sizeof(_optlen),
- &_optlen);
- if (lp == NULL)
- break;
- spec_len = optinfo->opts[temp] & 0x00FF;
-
- if (spec_len != 0x00FF && spec_len != *lp) {
- DEBUGP("Lbad %02X %04X\n", *lp,
- spec_len);
- return 0;
- }
- DEBUGP("Lok ");
- optlen = *lp + 2;
- } else {
- DEBUGP("Pad1\n");
- optlen = 1;
- }
-
- /* Step to the next */
- DEBUGP("len%04X \n", optlen);
-
- if ((ptr > skb->len - optlen || hdrlen < optlen) &&
- (temp < optinfo->optsnr - 1)) {
- DEBUGP("new pointer is too large! \n");
- break;
- }
- ptr += optlen;
- hdrlen -= optlen;
- }
- if (temp == optinfo->optsnr)
- return ret;
- else
- return 0;
- }
-
- return 0;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
- const void *info,
- const struct xt_match *match,
- void *matchinfo,
- unsigned int matchinfosize,
- unsigned int hook_mask)
-{
- const struct ip6t_opts *optsinfo = matchinfo;
-
- if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
- DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
- return 0;
- }
- return 1;
-}
-
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
- .name = "hbh",
-#else
- .name = "dst",
-#endif
- .match = match,
- .matchsize = sizeof(struct ip6t_opts),
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init ip6t_dst_init(void)
-{
- return ip6t_register_match(&opts_match);
-}
-
-static void __exit ip6t_dst_fini(void)
-{
- ip6t_unregister_match(&opts_match);
-}
-
-module_init(ip6t_dst_init);
-module_exit(ip6t_dst_fini);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 06768c84bd31..78d9c8b9e28a 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -119,7 +119,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ip6t_frag *fraginfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 374f1be85c0d..d32a205e3af2 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -19,15 +19,10 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_ipv6/ip6t_opts.h>
-#define HOPBYHOP 1
-
MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
+MODULE_DESCRIPTION("IPv6 opts match");
MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+MODULE_ALIAS("ip6t_dst");
#if 0
#define DEBUGP printk
@@ -71,11 +66,7 @@ match(const struct sk_buff *skb,
u8 _optlen, *lp = NULL;
unsigned int optlen;
-#if HOPBYHOP
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
+ if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0)
return 0;
oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
@@ -182,7 +173,6 @@ checkentry(const char *tablename,
const void *entry,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ip6t_opts *optsinfo = matchinfo;
@@ -194,26 +184,35 @@ checkentry(const char *tablename,
return 1;
}
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
- .name = "hbh",
-#else
- .name = "dst",
-#endif
- .match = match,
- .matchsize = sizeof(struct ip6t_opts),
- .checkentry = checkentry,
- .me = THIS_MODULE,
+static struct xt_match opts_match[] = {
+ {
+ .name = "hbh",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct ip6t_opts),
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+ .data = NEXTHDR_HOP,
+ },
+ {
+ .name = "dst",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct ip6t_opts),
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+ .data = NEXTHDR_DEST,
+ },
};
static int __init ip6t_hbh_init(void)
{
- return ip6t_register_match(&opts_match);
+ return xt_register_matches(opts_match, ARRAY_SIZE(opts_match));
}
static void __exit ip6t_hbh_fini(void)
{
- ip6t_unregister_match(&opts_match);
+ xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match));
}
module_init(ip6t_hbh_init);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 9375eeb1369f..3093c398002f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ip6t_ipv6header_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 5d047990cd44..4eb9bbc4ebc3 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -57,7 +57,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ip6t_owner_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index fbb0184a41d8..bcb2e168a5bc 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -197,7 +197,6 @@ checkentry(const char *tablename,
const void *entry,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ip6t_rt *rtinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 60976c0c58e8..2fc07c74decf 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ip6t_do_table(pskb, hook, in, out, &packet_filter);
}
static unsigned int
@@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook,
}
#endif
- return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ip6t_do_table(pskb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 03a13eab1dae..386ea260e767 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
}
static unsigned int
@@ -174,18 +174,14 @@ ip6t_local_hook(unsigned int hook,
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
- ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
if (ret != NF_DROP && ret != NF_STOLEN
&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
|| memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
|| (*pskb)->nfmark != nfmark
- || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
-
- /* something which could affect routing has changed */
-
- DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
- }
+ || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+ return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 61a7c58e99f8..b4154da575c0 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
+ return ip6t_do_table(pskb, hook, in, out, &packet_raw);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 2a71c3b669f1..e5e53fff9e38 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -20,7 +20,6 @@
* structures.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
@@ -336,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = {
/* From nf_conntrack_proto_icmpv6.c */
extern unsigned int nf_ct_icmpv6_timeout;
-/* From nf_conntrack_frag6.c */
+/* From nf_conntrack_reasm.c */
extern unsigned int nf_ct_frag6_timeout;
extern unsigned int nf_ct_frag6_low_thresh;
extern unsigned int nf_ct_frag6_high_thresh;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ef18a7b7014b..34d447208ffd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -33,7 +33,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
-unsigned long nf_ct_icmpv6_timeout = 30*HZ;
+unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c32a029e43f0..bf93c1ea6be9 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,7 +14,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -55,9 +54,9 @@
#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
-unsigned int nf_ct_frag6_high_thresh = 256*1024;
-unsigned int nf_ct_frag6_low_thresh = 192*1024;
-unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
+unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
+unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
+unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
struct nf_ct_frag6_skb_cb
{
@@ -409,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
return -1;
}
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum,
csum_partial(skb->nh.raw,
(u8*)(fhdr + 1) - skb->nh.raw,
@@ -641,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_HW)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &nf_ct_frag6_mem);
@@ -653,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->nh.ipv6h->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_HW)
+ if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
fq->fragments = NULL;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 779ddf77f4d4..efee7a6301a8 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -17,7 +17,6 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/sched.h>
#include <linux/socket.h>
#include <linux/net.h>
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fa1ce0ae123e..d09329ca3267 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,6 +50,9 @@
#include <net/udp.h>
#include <net/inet_common.h>
#include <net/tcp_states.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
#include <net/rawv6.h>
#include <net/xfrm.h>
@@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
while (sk) {
+ int filtered;
+
delivered = 1;
- if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+ switch (nexthdr) {
+ case IPPROTO_ICMPV6:
+ filtered = icmpv6_filter(sk, skb);
+ break;
+#ifdef CONFIG_IPV6_MIP6
+ case IPPROTO_MH:
+ /* XXX: To validate MH only once for each packet,
+ * this is placed here. It should be after checking
+ * xfrm policy, however it doesn't. The checking xfrm
+ * policy is placed in rawv6_rcv() because it is
+ * required for each socket.
+ */
+ filtered = mip6_mh_filter(sk, skb);
+ break;
+#endif
+ default:
+ filtered = 0;
+ break;
+ }
+
+ if (filtered < 0)
+ break;
+ if (filtered == 0) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
@@ -334,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!rp->checksum)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
skb_postpull_rcsum(skb, skb->nh.raw,
skb->h.raw - skb->nh.raw);
if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -411,6 +438,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
/* Copy the address. */
if (sin6) {
sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = 0;
ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
@@ -581,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
struct iovec *iov;
u8 __user *type = NULL;
u8 __user *code = NULL;
+#ifdef CONFIG_IPV6_MIP6
+ u8 len = 0;
+#endif
int probed = 0;
int i;
@@ -612,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
probed = 1;
}
break;
+#ifdef CONFIG_IPV6_MIP6
+ case IPPROTO_MH:
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+ /* check if type field is readable or not. */
+ if (iov->iov_len > 2 - len) {
+ u8 __user *p = iov->iov_base;
+ get_user(fl->fl_mh_type, &p[2 - len]);
+ probed = 1;
+ } else
+ len += iov->iov_len;
+
+ break;
+#endif
default:
probed = 1;
break;
@@ -758,6 +803,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
fl.oif = np->mcast_oif;
+ security_sk_classify_flow(sk, &fl);
err = ip6_dst_lookup(sk, &dst, &fl);
if (err)
@@ -780,7 +826,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
}
if (tclass < 0) {
- tclass = np->cork.tclass;
+ tclass = np->tclass;
if (tclass < 0)
tclass = 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index eef985e010ea..f39bbedd1327 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -28,7 +28,6 @@
* YOSHIFUJI,H. @USAGI Always remove fragment header to
* calculate ICV correctly.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -54,10 +53,10 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
-int sysctl_ip6frag_high_thresh = 256*1024;
-int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
+int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
struct ip6frag_skb_cb
{
@@ -153,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
}
static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
+int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
static void ip6_frag_secret_rebuild(unsigned long dummy)
{
@@ -434,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return;
}
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum,
csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
@@ -648,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_HW)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &ip6_frag_mem);
@@ -663,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
*skb_in = head;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_HW)
+ if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8a777932786d..d6b4b4f48d18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -22,10 +22,11 @@
* routers in REACHABLE, STALE, DELAY or PROBE states).
* - always select the same router if it is (probably)
* reachable. otherwise, round-robin the list.
+ * Ville Nuorvala
+ * Fixed routing subtrees.
*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/times.h>
@@ -36,7 +37,6 @@
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/init.h>
-#include <linux/netlink.h>
#include <linux/if_arp.h>
#ifdef CONFIG_PROC_FS
@@ -54,6 +54,8 @@
#include <linux/rtnetlink.h>
#include <net/dst.h>
#include <net/xfrm.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
#include <asm/uaccess.h>
@@ -74,9 +76,6 @@
#define CLONE_OFFLINK_ROUTE 0
-#define RT6_SELECT_F_IFACE 0x1
-#define RT6_SELECT_F_REACHABLE 0x2
-
static int ip6_rt_max_size = 4096;
static int ip6_rt_gc_min_interval = HZ / 2;
static int ip6_rt_gc_timeout = 60*HZ;
@@ -140,15 +139,49 @@ struct rt6_info ip6_null_entry = {
.rt6i_ref = ATOMIC_INIT(1),
};
-struct fib6_node ip6_routing_table = {
- .leaf = &ip6_null_entry,
- .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
-};
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-/* Protects all the ip6 fib */
+struct rt6_info ip6_prohibit_entry = {
+ .u = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+ .dev = &loopback_dev,
+ .obsolete = -1,
+ .error = -EACCES,
+ .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
+ .input = ip6_pkt_discard,
+ .output = ip6_pkt_discard_out,
+ .ops = &ip6_dst_ops,
+ .path = (struct dst_entry*)&ip6_prohibit_entry,
+ }
+ },
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .rt6i_metric = ~(u32) 0,
+ .rt6i_ref = ATOMIC_INIT(1),
+};
-DEFINE_RWLOCK(rt6_lock);
+struct rt6_info ip6_blk_hole_entry = {
+ .u = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+ .dev = &loopback_dev,
+ .obsolete = -1,
+ .error = -EINVAL,
+ .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
+ .input = ip6_pkt_discard,
+ .output = ip6_pkt_discard_out,
+ .ops = &ip6_dst_ops,
+ .path = (struct dst_entry*)&ip6_blk_hole_entry,
+ }
+ },
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .rt6i_metric = ~(u32) 0,
+ .rt6i_ref = ATOMIC_INIT(1),
+};
+#endif
/* allocate dst with ip6_dst_ops */
static __inline__ struct rt6_info *ip6_dst_alloc(void)
@@ -188,8 +221,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
time_after(jiffies, rt->rt6i_expires));
}
+static inline int rt6_need_strict(struct in6_addr *daddr)
+{
+ return (ipv6_addr_type(daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+}
+
/*
- * Route lookup. Any rt6_lock is implied.
+ * Route lookup. Any table->tb6_lock is implied.
*/
static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -298,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
int m, n;
m = rt6_check_dev(rt, oif);
- if (!m && (strict & RT6_SELECT_F_IFACE))
+ if (!m && (strict & RT6_LOOKUP_F_IFACE))
return -1;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
@@ -306,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
n = rt6_check_neigh(rt);
if (n > 1)
m |= 16;
- else if (!n && strict & RT6_SELECT_F_REACHABLE)
+ else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
return -1;
return m;
}
@@ -346,10 +385,10 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
}
if (!match &&
- (strict & RT6_SELECT_F_REACHABLE) &&
+ (strict & RT6_LOOKUP_F_REACHABLE) &&
last && last != rt0) {
/* no entries matched; do round-robin */
- static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(lock);
spin_lock(&lock);
*head = rt0->u.next;
rt0->u.next = last->u.next;
@@ -417,7 +456,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
if (rt && !lifetime) {
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
rt = NULL;
}
@@ -441,44 +480,95 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
#endif
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
- int oif, int strict)
+#define BACKTRACK(saddr) \
+do { \
+ if (rt == &ip6_null_entry) { \
+ struct fib6_node *pn; \
+ while (fn) { \
+ if (fn->fn_flags & RTN_TL_ROOT) \
+ goto out; \
+ pn = fn->parent; \
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+ fn = fib6_lookup(pn->subtree, NULL, saddr); \
+ else \
+ fn = pn; \
+ if (fn->fn_flags & RTN_RTINFO) \
+ goto restart; \
+ } \
+ } \
+} while(0)
+
+static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+ struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt;
- read_lock_bh(&rt6_lock);
- fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
- rt = rt6_device_match(fn->leaf, oif, strict);
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+restart:
+ rt = fn->leaf;
+ rt = rt6_device_match(rt, fl->oif, flags);
+ BACKTRACK(&fl->fl6_src);
+out:
dst_hold(&rt->u.dst);
- rt->u.dst.__use++;
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
rt->u.dst.lastuse = jiffies;
- if (rt->u.dst.error == 0)
- return rt;
- dst_release(&rt->u.dst);
+ rt->u.dst.__use++;
+
+ return rt;
+
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+ int oif, int strict)
+{
+ struct flowi fl = {
+ .oif = oif,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *daddr,
+ /* TODO: saddr */
+ },
+ },
+ };
+ struct dst_entry *dst;
+ int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
+
+ dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+ if (dst->error == 0)
+ return (struct rt6_info *) dst;
+
+ dst_release(dst);
+
return NULL;
}
-/* ip6_ins_rt is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
be destroyed.
*/
-int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
- void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
{
int err;
+ struct fib6_table *table;
- write_lock_bh(&rt6_lock);
- err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
- write_unlock_bh(&rt6_lock);
+ table = rt->rt6i_table;
+ write_lock_bh(&table->tb6_lock);
+ err = fib6_add(&table->tb6_root, rt, info);
+ write_unlock_bh(&table->tb6_lock);
return err;
}
+int ip6_ins_rt(struct rt6_info *rt)
+{
+ return __ip6_ins_rt(rt, NULL);
+}
+
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
struct in6_addr *saddr)
{
@@ -532,51 +622,39 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
return rt;
}
-#define BACKTRACK() \
-if (rt == &ip6_null_entry) { \
- while ((fn = fn->parent) != NULL) { \
- if (fn->fn_flags & RTN_ROOT) { \
- goto out; \
- } \
- if (fn->fn_flags & RTN_RTINFO) \
- goto restart; \
- } \
-}
-
-
-void ip6_route_input(struct sk_buff *skb)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+ struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
- int strict;
+ int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = RT6_LOOKUP_F_REACHABLE;
- strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
- read_lock_bh(&rt6_lock);
+ read_lock_bh(&table->tb6_lock);
restart_2:
- fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
- BACKTRACK();
+ rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
+ nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
else {
#if CLONE_OFFLINK_ROUTE
- nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
+ nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
#else
goto out2;
#endif
@@ -587,7 +665,7 @@ restart:
dst_hold(&rt->u.dst);
if (nrt) {
- err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
+ err = ip6_ins_rt(nrt);
if (!err)
goto out2;
}
@@ -596,7 +674,7 @@ restart:
goto out2;
/*
- * Race condition! In the gap, when rt6_lock was
+ * Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
@@ -608,40 +686,63 @@ out:
goto restart_2;
}
dst_hold(&rt->u.dst);
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
- skb->dst = (struct dst_entry *) rt;
- return;
+
+ return rt;
}
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+void ip6_route_input(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct flowi fl = {
+ .iif = skb->dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ .fwmark = skb->nfmark,
+#endif
+ .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+ },
+ },
+ .proto = iph->nexthdr,
+ };
+ int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
+
+ skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+}
+
+static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+ struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
- int strict;
+ int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = RT6_LOOKUP_F_REACHABLE;
- strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
- read_lock_bh(&rt6_lock);
+ read_lock_bh(&table->tb6_lock);
restart_2:
- fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
- BACKTRACK();
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -658,7 +759,7 @@ restart:
dst_hold(&rt->u.dst);
if (nrt) {
- err = ip6_ins_rt(nrt, NULL, NULL, NULL);
+ err = ip6_ins_rt(nrt);
if (!err)
goto out2;
}
@@ -667,7 +768,7 @@ restart:
goto out2;
/*
- * Race condition! In the gap, when rt6_lock was
+ * Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
@@ -679,11 +780,21 @@ out:
goto restart_2;
}
dst_hold(&rt->u.dst);
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
- return &rt->u.dst;
+ return rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+ int flags = 0;
+
+ if (rt6_need_strict(&fl->fl6_dst))
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
@@ -709,7 +820,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE)
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
else
dst_release(dst);
}
@@ -743,11 +854,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
}
dst->metrics[RTAX_MTU-1] = mtu;
+ call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
-/* Protected by rt6_lock. */
-static struct dst_entry *ndisc_dst_gc_list;
static int ipv6_get_mtu(struct net_device *dev);
static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -768,6 +878,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
return mtu;
}
+static struct dst_entry *ndisc_dst_gc_list;
+static DEFINE_SPINLOCK(ndisc_lock);
+
struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
struct neighbour *neigh,
struct in6_addr *addr,
@@ -808,10 +921,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
rt->rt6i_dst.plen = 128;
#endif
- write_lock_bh(&rt6_lock);
+ spin_lock_bh(&ndisc_lock);
rt->u.dst.next = ndisc_dst_gc_list;
ndisc_dst_gc_list = &rt->u.dst;
- write_unlock_bh(&rt6_lock);
+ spin_unlock_bh(&ndisc_lock);
fib6_force_start_gc();
@@ -825,8 +938,11 @@ int ndisc_dst_gc(int *more)
int freed;
next = NULL;
+ freed = 0;
+
+ spin_lock_bh(&ndisc_lock);
pprev = &ndisc_dst_gc_list;
- freed = 0;
+
while ((dst = *pprev) != NULL) {
if (!atomic_read(&dst->__refcnt)) {
*pprev = dst->next;
@@ -838,6 +954,8 @@ int ndisc_dst_gc(int *more)
}
}
+ spin_unlock_bh(&ndisc_lock);
+
return freed;
}
@@ -898,28 +1016,24 @@ int ipv6_get_hoplimit(struct net_device *dev)
*
*/
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
- void *_rtattr, struct netlink_skb_parms *req)
+int ip6_route_add(struct fib6_config *cfg)
{
int err;
- struct rtmsg *r;
- struct rtattr **rta;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
+ struct fib6_table *table;
int addr_type;
- rta = (struct rtattr **) _rtattr;
-
- if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+ if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
return -EINVAL;
#ifndef CONFIG_IPV6_SUBTREES
- if (rtmsg->rtmsg_src_len)
+ if (cfg->fc_src_len)
return -EINVAL;
#endif
- if (rtmsg->rtmsg_ifindex) {
+ if (cfg->fc_ifindex) {
err = -ENODEV;
- dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
+ dev = dev_get_by_index(cfg->fc_ifindex);
if (!dev)
goto out;
idev = in6_dev_get(dev);
@@ -927,8 +1041,14 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
goto out;
}
- if (rtmsg->rtmsg_metric == 0)
- rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_metric == 0)
+ cfg->fc_metric = IP6_RT_PRIO_USER;
+
+ table = fib6_new_table(cfg->fc_table);
+ if (table == NULL) {
+ err = -ENOBUFS;
+ goto out;
+ }
rt = ip6_dst_alloc();
@@ -938,14 +1058,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
}
rt->u.dst.obsolete = -1;
- rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
- if (nlh && (r = NLMSG_DATA(nlh))) {
- rt->rt6i_protocol = r->rtm_protocol;
- } else {
- rt->rt6i_protocol = RTPROT_BOOT;
- }
+ rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
- addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
+ if (cfg->fc_protocol == RTPROT_UNSPEC)
+ cfg->fc_protocol = RTPROT_BOOT;
+ rt->rt6i_protocol = cfg->fc_protocol;
+
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
if (addr_type & IPV6_ADDR_MULTICAST)
rt->u.dst.input = ip6_mc_input;
@@ -954,24 +1073,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
rt->u.dst.output = ip6_output;
- ipv6_addr_prefix(&rt->rt6i_dst.addr,
- &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
- rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
+ ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+ rt->rt6i_dst.plen = cfg->fc_dst_len;
if (rt->rt6i_dst.plen == 128)
rt->u.dst.flags = DST_HOST;
#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_prefix(&rt->rt6i_src.addr,
- &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
- rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
+ ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
+ rt->rt6i_src.plen = cfg->fc_src_len;
#endif
- rt->rt6i_metric = rtmsg->rtmsg_metric;
+ rt->rt6i_metric = cfg->fc_metric;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
*/
- if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
+ if ((cfg->fc_flags & RTF_REJECT) ||
(dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != &loopback_dev) {
@@ -994,12 +1111,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
goto install_route;
}
- if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
+ if (cfg->fc_flags & RTF_GATEWAY) {
struct in6_addr *gw_addr;
int gwa_type;
- gw_addr = &rtmsg->rtmsg_gateway;
- ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
+ gw_addr = &cfg->fc_gateway;
+ ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
gwa_type = ipv6_addr_type(gw_addr);
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1016,7 +1133,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
if (!(gwa_type&IPV6_ADDR_UNICAST))
goto out;
- grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
+ grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
if (grt == NULL)
@@ -1048,7 +1165,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
if (dev == NULL)
goto out;
- if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
+ if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
if (IS_ERR(rt->rt6i_nexthop)) {
err = PTR_ERR(rt->rt6i_nexthop);
@@ -1057,24 +1174,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
}
}
- rt->rt6i_flags = rtmsg->rtmsg_flags;
+ rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (rta && rta[RTA_METRICS-1]) {
- int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
- struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
-
- while (RTA_OK(attr, attrlen)) {
- unsigned flavor = attr->rta_type;
- if (flavor) {
- if (flavor > RTAX_MAX) {
+ if (cfg->fc_mx) {
+ struct nlattr *nla;
+ int remaining;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla->nla_type;
+
+ if (type) {
+ if (type > RTAX_MAX) {
err = -EINVAL;
goto out;
}
- rt->u.dst.metrics[flavor-1] =
- *(u32 *)RTA_DATA(attr);
+
+ rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
}
- attr = RTA_NEXT(attr, attrlen);
}
}
@@ -1086,7 +1203,8 @@ install_route:
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
rt->u.dst.dev = dev;
rt->rt6i_idev = idev;
- return ip6_ins_rt(rt, nlh, _rtattr, req);
+ rt->rt6i_table = table;
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
out:
if (dev)
@@ -1098,51 +1216,65 @@ out:
return err;
}
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
{
int err;
+ struct fib6_table *table;
- write_lock_bh(&rt6_lock);
+ if (rt == &ip6_null_entry)
+ return -ENOENT;
- err = fib6_del(rt, nlh, _rtattr, req);
+ table = rt->rt6i_table;
+ write_lock_bh(&table->tb6_lock);
+
+ err = fib6_del(rt, info);
dst_release(&rt->u.dst);
- write_unlock_bh(&rt6_lock);
+ write_unlock_bh(&table->tb6_lock);
return err;
}
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int ip6_del_rt(struct rt6_info *rt)
{
+ return __ip6_del_rt(rt, NULL);
+}
+
+static int ip6_route_del(struct fib6_config *cfg)
+{
+ struct fib6_table *table;
struct fib6_node *fn;
struct rt6_info *rt;
int err = -ESRCH;
- read_lock_bh(&rt6_lock);
+ table = fib6_get_table(cfg->fc_table);
+ if (table == NULL)
+ return err;
- fn = fib6_locate(&ip6_routing_table,
- &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
- &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+ read_lock_bh(&table->tb6_lock);
+
+ fn = fib6_locate(&table->tb6_root,
+ &cfg->fc_dst, cfg->fc_dst_len,
+ &cfg->fc_src, cfg->fc_src_len);
if (fn) {
for (rt = fn->leaf; rt; rt = rt->u.next) {
- if (rtmsg->rtmsg_ifindex &&
+ if (cfg->fc_ifindex &&
(rt->rt6i_dev == NULL ||
- rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+ rt->rt6i_dev->ifindex != cfg->fc_ifindex))
continue;
- if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
- !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+ if (cfg->fc_flags & RTF_GATEWAY &&
+ !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
continue;
- if (rtmsg->rtmsg_metric &&
- rtmsg->rtmsg_metric != rt->rt6i_metric)
+ if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
continue;
dst_hold(&rt->u.dst);
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
- return ip6_del_rt(rt, nlh, _rtattr, req);
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
}
}
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
return err;
}
@@ -1150,11 +1282,17 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
/*
* Handle redirects
*/
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
+struct ip6rd_flowi {
+ struct flowi fl;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+ struct flowi *fl,
+ int flags)
{
- struct rt6_info *rt, *nrt = NULL;
- int strict;
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+ struct rt6_info *rt;
struct fib6_node *fn;
/*
@@ -1167,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
* is a bit fuzzy and one might need to check all possible
* routes.
*/
- strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
- read_lock_bh(&rt6_lock);
- fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
for (rt = fn->leaf; rt; rt = rt->u.next) {
/*
@@ -1185,29 +1322,60 @@ restart:
continue;
if (!(rt->rt6i_flags & RTF_GATEWAY))
continue;
- if (neigh->dev != rt->rt6i_dev)
+ if (fl->oif != rt->rt6i_dev->ifindex)
continue;
- if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
continue;
break;
}
- if (rt)
- dst_hold(&rt->u.dst);
- else if (strict) {
- while ((fn = fn->parent) != NULL) {
- if (fn->fn_flags & RTN_ROOT)
- break;
- if (fn->fn_flags & RTN_RTINFO)
- goto restart;
- }
- }
- read_unlock_bh(&rt6_lock);
- if (!rt) {
+ if (!rt)
+ rt = &ip6_null_entry;
+ BACKTRACK(&fl->fl6_src);
+out:
+ dst_hold(&rt->u.dst);
+
+ read_unlock_bh(&table->tb6_lock);
+
+ return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
+ struct in6_addr *src,
+ struct in6_addr *gateway,
+ struct net_device *dev)
+{
+ struct ip6rd_flowi rdfl = {
+ .fl = {
+ .oif = dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *dest,
+ .saddr = *src,
+ },
+ },
+ },
+ .gateway = *gateway,
+ };
+ int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
+
+ return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+ struct in6_addr *saddr,
+ struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+ struct rt6_info *rt, *nrt = NULL;
+ struct netevent_redirect netevent;
+
+ rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+ if (rt == &ip6_null_entry) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
"for redirect target\n");
- return;
+ goto out;
}
/*
@@ -1250,11 +1418,15 @@ restart:
nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
- if (ip6_ins_rt(nrt, NULL, NULL, NULL))
+ if (ip6_ins_rt(nrt))
goto out;
+ netevent.old = &rt->u.dst;
+ netevent.new = &nrt->u.dst;
+ call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
+
if (rt->rt6i_flags&RTF_CACHE) {
- ip6_del_rt(rt, NULL, NULL, NULL);
+ ip6_del_rt(rt);
return;
}
@@ -1336,7 +1508,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
- ip6_ins_rt(nrt, NULL, NULL, NULL);
+ ip6_ins_rt(nrt);
}
out:
dst_release(&rt->u.dst);
@@ -1372,6 +1544,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
+ rt->rt6i_table = ort->rt6i_table;
}
return rt;
}
@@ -1382,9 +1555,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
{
struct fib6_node *fn;
struct rt6_info *rt = NULL;
+ struct fib6_table *table;
- write_lock_bh(&rt6_lock);
- fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+ table = fib6_get_table(RT6_TABLE_INFO);
+ if (table == NULL)
+ return NULL;
+
+ write_lock_bh(&table->tb6_lock);
+ fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
if (!fn)
goto out;
@@ -1399,7 +1577,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
break;
}
out:
- write_unlock_bh(&rt6_lock);
+ write_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -1407,21 +1585,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
struct in6_addr *gwaddr, int ifindex,
unsigned pref)
{
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_INFO,
+ .fc_metric = 1024,
+ .fc_ifindex = ifindex,
+ .fc_dst_len = prefixlen,
+ .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
+ RTF_UP | RTF_PREF(pref),
+ };
+
+ ipv6_addr_copy(&cfg.fc_dst, prefix);
+ ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
- memset(&rtmsg, 0, sizeof(rtmsg));
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
- ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
- rtmsg.rtmsg_dst_len = prefixlen;
- ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
- rtmsg.rtmsg_metric = 1024;
- rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
/* We should treat it as a default route if prefix length is 0. */
if (!prefixlen)
- rtmsg.rtmsg_flags |= RTF_DEFAULT;
- rtmsg.rtmsg_ifindex = ifindex;
+ cfg.fc_flags |= RTF_DEFAULT;
- ip6_route_add(&rtmsg, NULL, NULL, NULL);
+ ip6_route_add(&cfg);
return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
}
@@ -1430,12 +1610,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
{
struct rt6_info *rt;
- struct fib6_node *fn;
+ struct fib6_table *table;
- fn = &ip6_routing_table;
+ table = fib6_get_table(RT6_TABLE_DFLT);
+ if (table == NULL)
+ return NULL;
- write_lock_bh(&rt6_lock);
- for (rt = fn->leaf; rt; rt=rt->u.next) {
+ write_lock_bh(&table->tb6_lock);
+ for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
if (dev == rt->rt6i_dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1443,7 +1625,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
}
if (rt)
dst_hold(&rt->u.dst);
- write_unlock_bh(&rt6_lock);
+ write_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -1451,43 +1633,65 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref)
{
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_DFLT,
+ .fc_metric = 1024,
+ .fc_ifindex = dev->ifindex,
+ .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
+ RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ };
- memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
- ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
- rtmsg.rtmsg_metric = 1024;
- rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
- RTF_PREF(pref);
+ ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
- rtmsg.rtmsg_ifindex = dev->ifindex;
+ ip6_route_add(&cfg);
- ip6_route_add(&rtmsg, NULL, NULL, NULL);
return rt6_get_dflt_router(gwaddr, dev);
}
void rt6_purge_dflt_routers(void)
{
struct rt6_info *rt;
+ struct fib6_table *table;
+
+ /* NOTE: Keep consistent with rt6_get_dflt_router */
+ table = fib6_get_table(RT6_TABLE_DFLT);
+ if (table == NULL)
+ return;
restart:
- read_lock_bh(&rt6_lock);
- for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+ read_lock_bh(&table->tb6_lock);
+ for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
dst_hold(&rt->u.dst);
-
- read_unlock_bh(&rt6_lock);
-
- ip6_del_rt(rt, NULL, NULL, NULL);
-
+ read_unlock_bh(&table->tb6_lock);
+ ip6_del_rt(rt);
goto restart;
}
}
- read_unlock_bh(&rt6_lock);
+ read_unlock_bh(&table->tb6_lock);
+}
+
+static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
+ struct fib6_config *cfg)
+{
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
+ cfg->fc_metric = rtmsg->rtmsg_metric;
+ cfg->fc_expires = rtmsg->rtmsg_info;
+ cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
+ cfg->fc_src_len = rtmsg->rtmsg_src_len;
+ cfg->fc_flags = rtmsg->rtmsg_flags;
+
+ ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
+ ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
+ ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
}
int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
{
+ struct fib6_config cfg;
struct in6_rtmsg rtmsg;
int err;
@@ -1500,14 +1704,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
sizeof(struct in6_rtmsg));
if (err)
return -EFAULT;
-
+
+ rtmsg_to_fib6_config(&rtmsg, &cfg);
+
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+ err = ip6_route_add(&cfg);
break;
case SIOCDELRT:
- err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+ err = ip6_route_del(&cfg);
break;
default:
err = -EINVAL;
@@ -1526,6 +1732,10 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
static int ip6_pkt_discard(struct sk_buff *skb)
{
+ int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
+ if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
+ IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
+
IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
kfree_skb(skb);
@@ -1577,6 +1787,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
+ rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
atomic_set(&rt->u.dst.__refcnt, 1);
@@ -1595,9 +1806,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
void rt6_ifdown(struct net_device *dev)
{
- write_lock_bh(&rt6_lock);
- fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
- write_unlock_bh(&rt6_lock);
+ fib6_clean_all(fib6_ifdown, 0, dev);
}
struct rt6_mtu_change_arg
@@ -1647,80 +1856,114 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
void rt6_mtu_change(struct net_device *dev, unsigned mtu)
{
- struct rt6_mtu_change_arg arg;
+ struct rt6_mtu_change_arg arg = {
+ .dev = dev,
+ .mtu = mtu,
+ };
- arg.dev = dev;
- arg.mtu = mtu;
- read_lock_bh(&rt6_lock);
- fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
- read_unlock_bh(&rt6_lock);
+ fib6_clean_all(rt6_mtu_change_route, 0, &arg);
}
-static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
- struct in6_rtmsg *rtmsg)
+static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
+ [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
+ [RTA_OIF] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_PRIORITY] = { .type = NLA_U32 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+};
+
+static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct fib6_config *cfg)
{
- memset(rtmsg, 0, sizeof(*rtmsg));
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
+ int err;
- rtmsg->rtmsg_dst_len = r->rtm_dst_len;
- rtmsg->rtmsg_src_len = r->rtm_src_len;
- rtmsg->rtmsg_flags = RTF_UP;
- if (r->rtm_type == RTN_UNREACHABLE)
- rtmsg->rtmsg_flags |= RTF_REJECT;
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
- if (rta[RTA_GATEWAY-1]) {
- if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
- return -EINVAL;
- memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
- rtmsg->rtmsg_flags |= RTF_GATEWAY;
- }
- if (rta[RTA_DST-1]) {
- if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
- return -EINVAL;
- memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+ err = -EINVAL;
+ rtm = nlmsg_data(nlh);
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->fc_table = rtm->rtm_table;
+ cfg->fc_dst_len = rtm->rtm_dst_len;
+ cfg->fc_src_len = rtm->rtm_src_len;
+ cfg->fc_flags = RTF_UP;
+ cfg->fc_protocol = rtm->rtm_protocol;
+
+ if (rtm->rtm_type == RTN_UNREACHABLE)
+ cfg->fc_flags |= RTF_REJECT;
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
+
+ if (tb[RTA_GATEWAY]) {
+ nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+ cfg->fc_flags |= RTF_GATEWAY;
}
- if (rta[RTA_SRC-1]) {
- if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
- return -EINVAL;
- memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+
+ if (tb[RTA_DST]) {
+ int plen = (rtm->rtm_dst_len + 7) >> 3;
+
+ if (nla_len(tb[RTA_DST]) < plen)
+ goto errout;
+
+ nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
}
- if (rta[RTA_OIF-1]) {
- if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
- return -EINVAL;
- memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+
+ if (tb[RTA_SRC]) {
+ int plen = (rtm->rtm_src_len + 7) >> 3;
+
+ if (nla_len(tb[RTA_SRC]) < plen)
+ goto errout;
+
+ nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
}
- if (rta[RTA_PRIORITY-1]) {
- if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
- return -EINVAL;
- memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+
+ if (tb[RTA_OIF])
+ cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ if (tb[RTA_PRIORITY])
+ cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
+
+ if (tb[RTA_METRICS]) {
+ cfg->fc_mx = nla_data(tb[RTA_METRICS]);
+ cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
}
- return 0;
+
+ if (tb[RTA_TABLE])
+ cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+
+ err = 0;
+errout:
+ return err;
}
int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtmsg *r = NLMSG_DATA(nlh);
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg;
+ int err;
- if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
- return -EINVAL;
- return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+
+ return ip6_route_del(&cfg);
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtmsg *r = NLMSG_DATA(nlh);
- struct in6_rtmsg rtmsg;
+ struct fib6_config cfg;
+ int err;
- if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
- return -EINVAL;
- return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
-}
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
-struct rt6_rtnl_dump_arg
-{
- struct sk_buff *skb;
- struct netlink_callback *cb;
-};
+ return ip6_route_add(&cfg);
+}
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
@@ -1728,9 +1971,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
int prefix, unsigned int flags)
{
struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ struct nlmsghdr *nlh;
struct rta_cacheinfo ci;
+ u32 table;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1739,13 +1982,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
}
}
- nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET6;
rtm->rtm_dst_len = rt->rt6i_dst.plen;
rtm->rtm_src_len = rt->rt6i_src.plen;
rtm->rtm_tos = 0;
- rtm->rtm_table = RT_TABLE_MAIN;
+ if (rt->rt6i_table)
+ table = rt->rt6i_table->tb6_id;
+ else
+ table = RT6_TABLE_UNSPEC;
+ rtm->rtm_table = table;
+ NLA_PUT_U32(skb, RTA_TABLE, table);
if (rt->rt6i_flags&RTF_REJECT)
rtm->rtm_type = RTN_UNREACHABLE;
else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1766,31 +2017,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
rtm->rtm_flags |= RTM_F_CLONED;
if (dst) {
- RTA_PUT(skb, RTA_DST, 16, dst);
+ NLA_PUT(skb, RTA_DST, 16, dst);
rtm->rtm_dst_len = 128;
} else if (rtm->rtm_dst_len)
- RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+ NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
#ifdef CONFIG_IPV6_SUBTREES
if (src) {
- RTA_PUT(skb, RTA_SRC, 16, src);
+ NLA_PUT(skb, RTA_SRC, 16, src);
rtm->rtm_src_len = 128;
} else if (rtm->rtm_src_len)
- RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+ NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
#endif
if (iif)
- RTA_PUT(skb, RTA_IIF, 4, &iif);
+ NLA_PUT_U32(skb, RTA_IIF, iif);
else if (dst) {
struct in6_addr saddr_buf;
if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
- RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
}
+
if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
+
if (rt->u.dst.neighbour)
- RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+ NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+
if (rt->u.dst.dev)
- RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
- RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+ NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+ NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
if (rt->rt6i_expires)
ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
@@ -1802,23 +2057,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
ci.rta_id = 0;
ci.rta_ts = 0;
ci.rta_tsage = 0;
- RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
int prefix;
- if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
- struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
+ if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+ struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
} else
prefix = 0;
@@ -1828,189 +2081,108 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix, NLM_F_MULTI);
}
-static int fib6_dump_node(struct fib6_walker_t *w)
+int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- int res;
+ struct nlattr *tb[RTA_MAX+1];
struct rt6_info *rt;
+ struct sk_buff *skb;
+ struct rtmsg *rtm;
+ struct flowi fl;
+ int err, iif = 0;
- for (rt = w->leaf; rt; rt = rt->u.next) {
- res = rt6_dump_route(rt, w->args);
- if (res < 0) {
- /* Frame is full, suspend walking */
- w->leaf = rt;
- return 1;
- }
- BUG_TRAP(res!=0);
- }
- w->leaf = NULL;
- return 0;
-}
-
-static void fib6_dump_end(struct netlink_callback *cb)
-{
- struct fib6_walker_t *w = (void*)cb->args[0];
-
- if (w) {
- cb->args[0] = 0;
- fib6_walker_unlink(w);
- kfree(w);
- }
- cb->done = (void*)cb->args[1];
- cb->args[1] = 0;
-}
-
-static int fib6_dump_done(struct netlink_callback *cb)
-{
- fib6_dump_end(cb);
- return cb->done ? cb->done(cb) : 0;
-}
-
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct rt6_rtnl_dump_arg arg;
- struct fib6_walker_t *w;
- int res;
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
- arg.skb = skb;
- arg.cb = cb;
+ err = -EINVAL;
+ memset(&fl, 0, sizeof(fl));
- w = (void*)cb->args[0];
- if (w == NULL) {
- /* New dump:
- *
- * 1. hook callback destructor.
- */
- cb->args[1] = (long)cb->done;
- cb->done = fib6_dump_done;
+ if (tb[RTA_SRC]) {
+ if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+ goto errout;
- /*
- * 2. allocate and initialize walker.
- */
- w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (w == NULL)
- return -ENOMEM;
- RT6_TRACE("dump<%p", w);
- w->root = &ip6_routing_table;
- w->func = fib6_dump_node;
- w->args = &arg;
- cb->args[0] = (long)w;
- read_lock_bh(&rt6_lock);
- res = fib6_walk(w);
- read_unlock_bh(&rt6_lock);
- } else {
- w->args = &arg;
- read_lock_bh(&rt6_lock);
- res = fib6_walk_continue(w);
- read_unlock_bh(&rt6_lock);
+ ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
}
-#if RT6_DEBUG >= 3
- if (res <= 0 && skb->len == 0)
- RT6_TRACE("%p>dump end\n", w);
-#endif
- res = res < 0 ? res : skb->len;
- /* res < 0 is an error. (really, impossible)
- res == 0 means that dump is complete, but skb still can contain data.
- res > 0 dump is not complete, but frame is full.
- */
- /* Destroy walker, if dump of this table is complete. */
- if (res <= 0)
- fib6_dump_end(cb);
- return res;
-}
-
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
-{
- struct rtattr **rta = arg;
- int iif = 0;
- int err = -ENOBUFS;
- struct sk_buff *skb;
- struct flowi fl;
- struct rt6_info *rt;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (skb == NULL)
- goto out;
+ if (tb[RTA_DST]) {
+ if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+ goto errout;
- /* Reserve room for dummy headers, this skb can pass
- through good chunk of routing engine.
- */
- skb->mac.raw = skb->data;
- skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+ }
- memset(&fl, 0, sizeof(fl));
- if (rta[RTA_SRC-1])
- ipv6_addr_copy(&fl.fl6_src,
- (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
- if (rta[RTA_DST-1])
- ipv6_addr_copy(&fl.fl6_dst,
- (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
+ if (tb[RTA_IIF])
+ iif = nla_get_u32(tb[RTA_IIF]);
- if (rta[RTA_IIF-1])
- memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+ if (tb[RTA_OIF])
+ fl.oif = nla_get_u32(tb[RTA_OIF]);
if (iif) {
struct net_device *dev;
dev = __dev_get_by_index(iif);
if (!dev) {
err = -ENODEV;
- goto out_free;
+ goto errout;
}
}
- fl.oif = 0;
- if (rta[RTA_OIF-1])
- memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
- rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb->mac.raw = skb->data;
+ skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
skb->dst = &rt->u.dst;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
- err = rt6_fill_node(skb, rt,
- &fl.fl6_dst, &fl.fl6_src,
- iif,
+ err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, 0);
if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
+ kfree_skb(skb);
+ goto errout;
}
- err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
- if (err > 0)
- err = 0;
-out:
+ err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
return err;
-out_free:
- kfree_skb(skb);
- goto out;
}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
- struct netlink_skb_parms *req)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
- u32 pid = current->pid;
- u32 seq = 0;
-
- if (req)
- pid = req->pid;
- if (nlh)
- seq = nlh->nlmsg_seq;
-
- skb = alloc_skb(size, gfp_any());
- if (!skb) {
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
- return;
+ u32 pid = 0, seq = 0;
+ struct nlmsghdr *nlh = NULL;
+ int payload = sizeof(struct rtmsg) + 256;
+ int err = -ENOBUFS;
+
+ if (info) {
+ pid = info->pid;
+ nlh = info->nlh;
+ if (nlh)
+ seq = nlh->nlmsg_seq;
}
- if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
+
+ skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+ if (skb == NULL)
+ goto errout;
+
+ err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
+
+ err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
}
/*
@@ -2086,16 +2258,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
{
- struct rt6_proc_arg arg;
- arg.buffer = buffer;
- arg.offset = offset;
- arg.length = length;
- arg.skip = 0;
- arg.len = 0;
+ struct rt6_proc_arg arg = {
+ .buffer = buffer,
+ .offset = offset,
+ .length = length,
+ };
- read_lock_bh(&rt6_lock);
- fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
- read_unlock_bh(&rt6_lock);
+ fib6_clean_all(rt6_info_route, 0, &arg);
*start = buffer;
if (offset)
@@ -2250,13 +2419,9 @@ void __init ip6_route_init(void)
{
struct proc_dir_entry *p;
- ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
- sizeof(struct rt6_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!ip6_dst_ops.kmem_cachep)
- panic("cannot create ip6_dst_cache");
-
+ ip6_dst_ops.kmem_cachep =
+ kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
fib6_init();
#ifdef CONFIG_PROC_FS
p = proc_net_create("ipv6_route", 0, rt6_proc_info);
@@ -2268,10 +2433,16 @@ void __init ip6_route_init(void)
#ifdef CONFIG_XFRM
xfrm6_init();
#endif
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ fib6_rules_init();
+#endif
}
void ip6_route_cleanup(void)
{
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ fib6_rules_cleanup();
+#endif
#ifdef CONFIG_PROC_FS
proc_net_remove("ipv6_route");
proc_net_remove("rt6_stats");
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6578c3080f47..836eecd7e62b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -18,7 +18,6 @@
* Nate Thompson <nate@thebog.net>: 6to4 support
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
@@ -381,7 +380,6 @@ static int ipip6_rcv(struct sk_buff *skb)
secpath_reset(skb);
skb->mac.raw = skb->nh.raw;
skb->nh.raw = skb->data;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 8eff9fa1e983..7a4639db1346 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -7,7 +7,6 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
-#include <linux/config.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <net/ndisc.h>
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a50eb306e9e2..3b6575478fcc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -26,7 +26,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -252,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
final_p = &final;
}
+ security_sk_classify_flow(sk, &fl);
+
err = ip6_dst_lookup(sk, &dst, &fl);
if (err)
goto failure;
@@ -270,9 +271,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
ipv6_addr_copy(&np->saddr, saddr);
inet->rcv_saddr = LOOPBACK4_IPV6;
- ip6_dst_store(sk, dst, NULL);
- sk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ sk->sk_gso_type = SKB_GSO_TCPV6;
+ __ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -376,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet->dport;
fl.fl_ip_sport = inet->sport;
+ security_skb_classify_flow(skb, &fl);
if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
sk->sk_err_soft = -err;
@@ -429,7 +430,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case TCP_SYN_RECV: /* Cannot happen.
It can, it SYNs are crossed. --ANK */
if (!sock_owned_by_user(sk)) {
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -470,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
fl.oif = treq->iif;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_req_classify_flow(req, &fl);
if (dst == NULL) {
opt = np->opt;
@@ -544,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcphdr *th = skb->h.th;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
skb->csum = offsetof(struct tcphdr, check);
} else {
@@ -554,6 +555,24 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
}
}
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ ipv6h = skb->nh.ipv6h;
+ th = skb->h.th;
+
+ th->check = 0;
+ th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_TCP, 0);
+ skb->csum = offsetof(struct tcphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
static void tcp_v6_send_reset(struct sk_buff *skb)
{
@@ -610,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
fl.oif = inet6_iif(skb);
fl.fl_ip_dport = t1->dest;
fl.fl_ip_sport = t1->source;
+ security_skb_classify_flow(skb, &fl);
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -676,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
fl.oif = inet6_iif(skb);
fl.fl_ip_dport = t1->dest;
fl.fl_ip_sport = t1->source;
+ security_skb_classify_flow(skb, &fl);
if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -805,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_rsk(req)->snt_isn = isn;
+ security_inet_conn_request(sk, skb, req);
+
if (tcp_v6_send_synack(sk, req, NULL))
goto drop;
@@ -815,7 +838,6 @@ drop:
if (req)
reqsk_free(req);
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
return 0; /* don't send reset */
}
@@ -909,6 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_req_classify_flow(req, &fl);
if (ip6_dst_lookup(sk, &dst, &fl))
goto out;
@@ -930,9 +953,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* comment in that function for the gory details. -acme
*/
- ip6_dst_store(newsk, dst, NULL);
- newsk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ newsk->sk_gso_type = SKB_GSO_TCPV6;
+ __ip6_dst_store(newsk, dst, NULL, NULL);
newtcp6sk = (struct tcp6_sock *)newsk;
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1011,7 +1033,7 @@ out:
static int tcp_v6_checksum_init(struct sk_buff *skb)
{
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
&skb->nh.ipv6h->daddr,skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1053,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard;
/*
@@ -1210,12 +1232,12 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
#ifdef CONFIG_NET_DMA
@@ -1469,7 +1491,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
sp->sk_state,
- tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+ tp->write_seq-tp->snd_una,
+ (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
timer_active,
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1605,6 +1628,8 @@ struct proto tcpv6_prot = {
static struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
+ .gso_send_check = tcp_v6_gso_send_check,
+ .gso_segment = tcp_tso_segment,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8d3432a70f3a..9662561701d1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -23,7 +23,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -62,81 +61,9 @@
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- */
-static int udp_v6_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
- struct sock *sk2;
- struct hlist_node *node;
-
- write_lock_bh(&udp_hash_lock);
- if (snum == 0) {
- int best_size_so_far, best, result, i;
-
- if (udp_port_rover > sysctl_local_port_range[1] ||
- udp_port_rover < sysctl_local_port_range[0])
- udp_port_rover = sysctl_local_port_range[0];
- best_size_so_far = 32767;
- best = result = udp_port_rover;
- for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
- int size;
- struct hlist_head *list;
-
- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
- if (hlist_empty(list)) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0] +
- ((result - sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
- goto gotit;
- }
- size = 0;
- sk_for_each(sk2, node, list)
- if (++size >= best_size_so_far)
- goto next;
- best_size_so_far = size;
- best = result;
- next:;
- }
- result = best;
- for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
- if (!udp_lport_inuse(result))
- break;
- }
- if (i >= (1 << 16) / UDP_HTABLE_SIZE)
- goto fail;
-gotit:
- udp_port_rover = snum = result;
- } else {
- sk_for_each(sk2, node,
- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
- if (inet_sk(sk2)->num == snum &&
- sk2 != sk &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- ipv6_rcv_saddr_equal(sk, sk2))
- goto fail;
- }
- }
-
- inet_sk(sk)->num = snum;
- if (sk_unhashed(sk)) {
- sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
- sock_prot_inc_use(sk->sk_prot);
- }
- write_unlock_bh(&udp_hash_lock);
- return 0;
-
-fail:
- write_unlock_bh(&udp_hash_lock);
- return 1;
+ return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
}
static void udp_v6_hash(struct sock *sk)
@@ -346,6 +273,8 @@ out:
static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
+ int rc;
+
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
kfree_skb(skb);
return -1;
@@ -357,7 +286,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
return 0;
}
- if (sock_queue_rcv_skb(sk,skb)<0) {
+ if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+ /* Note that an ENOMEM error is charged twice */
+ if (rc == -ENOMEM)
+ UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
return 0;
@@ -476,7 +408,7 @@ static int udpv6_rcv(struct sk_buff **pskb)
uh = skb->h.uh;
}
- if (skb->ip_summed == CHECKSUM_HW &&
+ if (skb->ip_summed == CHECKSUM_COMPLETE &&
!csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -783,7 +715,9 @@ do_udp_sendmsg:
connected = 0;
}
- err = ip6_dst_lookup(sk, &dst, fl);
+ security_sk_classify_flow(sk, fl);
+
+ err = ip6_sk_dst_lookup(sk, &dst, fl);
if (err)
goto out;
if (final_p)
@@ -841,7 +775,12 @@ do_append_data:
if (connected) {
ip6_dst_store(sk, dst,
ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
- &np->daddr : NULL);
+ &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
} else {
dst_release(dst);
}
@@ -856,6 +795,16 @@ out:
UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
return len;
}
+ /*
+ * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
+ * ENOBUFS might not be good (it's not tunable per se), but otherwise
+ * we don't have a good statistic (IpOutDiscards but it can be too many
+ * things). We could add another new stat but at least for now that
+ * seems like overkill.
+ */
+ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+ }
return err;
do_confirm:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0405d74ff910..5c8b7a568800 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,10 +16,10 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
{
int err;
- u32 seq;
+ __be32 seq;
struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
@@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
if (x->mode->input(x, skb))
goto drop;
- if (x->props.mode) { /* XXX */
+ if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
decaps = 1;
break;
}
@@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb)
{
return xfrm6_rcv_spi(*pskb, 0);
}
+
+int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+ xfrm_address_t *saddr, u8 proto)
+{
+ struct xfrm_state *x = NULL;
+ int wildcard = 0;
+ struct in6_addr any;
+ xfrm_address_t *xany;
+ struct xfrm_state *xfrm_vec_one = NULL;
+ int nh = 0;
+ int i = 0;
+
+ ipv6_addr_set(&any, 0, 0, 0, 0);
+ xany = (xfrm_address_t *)&any;
+
+ for (i = 0; i < 3; i++) {
+ xfrm_address_t *dst, *src;
+ switch (i) {
+ case 0:
+ dst = daddr;
+ src = saddr;
+ break;
+ case 1:
+ /* lookup state with wild-card source address */
+ wildcard = 1;
+ dst = daddr;
+ src = xany;
+ break;
+ case 2:
+ default:
+ /* lookup state with wild-card addresses */
+ wildcard = 1; /* XXX */
+ dst = xany;
+ src = xany;
+ break;
+ }
+
+ x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6);
+ if (!x)
+ continue;
+
+ spin_lock(&x->lock);
+
+ if (wildcard) {
+ if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) {
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+ }
+
+ if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+ if (xfrm_state_check_expire(x)) {
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
+ nh = x->type->input(x, skb);
+ if (nh <= 0) {
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
+ x->curlft.bytes += skb->len;
+ x->curlft.packets++;
+
+ spin_unlock(&x->lock);
+
+ xfrm_vec_one = x;
+ break;
+ }
+
+ if (!xfrm_vec_one)
+ goto drop;
+
+ /* Allocate new secpath or COW existing one. */
+ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+ struct sec_path *sp;
+ sp = secpath_dup(skb->sp);
+ if (!sp)
+ goto drop;
+ if (skb->sp)
+ secpath_put(skb->sp);
+ skb->sp = sp;
+ }
+
+ if (1 + skb->sp->len > XFRM_MAX_DEPTH)
+ goto drop;
+
+ skb->sp->xvec[skb->sp->len] = xfrm_vec_one;
+ skb->sp->len ++;
+
+ return 1;
+drop:
+ if (xfrm_vec_one)
+ xfrm_state_put(xfrm_vec_one);
+ return -1;
+}
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 000000000000..6031c16d46ca
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,93 @@
+/*
+ * xfrm6_mode_ro.c - Route optimization mode for IPv6.
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * Authors:
+ * Noriaki TAKAMIYA @USAGI
+ * Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header. The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ u8 *prevhdr;
+ int hdr_len;
+
+ skb_push(skb, x->props.header_len);
+ iph = skb->nh.ipv6h;
+
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ skb->nh.raw = prevhdr - x->props.header_len;
+ skb->h.raw = skb->data + hdr_len;
+ memmove(skb->data, iph, hdr_len);
+ return 0;
+}
+
+/*
+ * Do nothing about routing optimization header unlike IPsec.
+ */
+static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static struct xfrm_mode xfrm6_ro_mode = {
+ .input = xfrm6_ro_input,
+ .output = xfrm6_ro_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_ROUTEOPTIMIZATION,
+};
+
+static int __init xfrm6_ro_init(void)
+{
+ return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
+}
+
+static void __exit xfrm6_ro_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
+ BUG_ON(err);
+}
+
+module_init(xfrm6_ro_init);
+module_exit(xfrm6_ro_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 711d713e36d8..3a4b39b12bad 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -25,9 +25,8 @@
* its absence, that of the top IP header. The value of skb->data will always
* point to the top IP header.
*/
-static int xfrm6_transport_output(struct sk_buff *skb)
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_state *x = skb->dst->xfrm;
struct ipv6hdr *iph;
u8 *prevhdr;
int hdr_len;
@@ -35,7 +34,7 @@ static int xfrm6_transport_output(struct sk_buff *skb)
skb_push(skb, x->props.header_len);
iph = skb->nh.ipv6h;
- hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
skb->nh.raw = prevhdr - x->props.header_len;
skb->h.raw = skb->data + hdr_len;
memmove(skb->data, iph, hdr_len);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8af79be2edca..5e7d8a7d6414 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
* its absence, that of the top IP header. The value of skb->data will always
* point to the top IP header.
*/
-static int xfrm6_tunnel_output(struct sk_buff *skb)
+static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct xfrm_state *x = dst->xfrm;
struct ipv6hdr *iph, *top_iph;
int dsfield;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 48fccb1eca08..c260ea104c52 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,12 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
+int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **prevhdr)
+{
+ return ip6_find_1stfragopt(skb, prevhdr);
+}
+
static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
@@ -41,13 +47,13 @@ static int xfrm6_output_one(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int err;
- if (skb->ip_summed == CHECKSUM_HW) {
- err = skb_checksum_help(skb, 0);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ err = skb_checksum_help(skb);
if (err)
goto error_nolock;
}
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = xfrm6_tunnel_check_size(skb);
if (err)
goto error_nolock;
@@ -59,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
if (err)
goto error;
- err = x->mode->output(skb);
+ err = x->mode->output(x, skb);
if (err)
goto error;
@@ -69,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb)
x->curlft.bytes += skb->len;
x->curlft.packets++;
+ if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
+ x->lastused = (u64)xtime.tv_sec;
spin_unlock_bh(&x->lock);
@@ -80,7 +88,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
}
dst = skb->dst;
x = dst->xfrm;
- } while (x && !x->props.mode);
+ } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
err = 0;
@@ -122,10 +130,10 @@ static int xfrm6_output_finish(struct sk_buff *skb)
{
struct sk_buff *segs;
- if (!skb_shinfo(skb)->gso_size)
+ if (!skb_is_gso(skb))
return xfrm6_output_finish2(skb);
- skb->protocol = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IPV6);
segs = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (unlikely(IS_ERR(segs)))
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ee715f2691e9..6a252e2134d1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -12,13 +12,15 @@
*/
#include <linux/compiler.h>
-#include <linux/config.h>
#include <linux/netdevice.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
static struct dst_ops xfrm6_dst_ops;
static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
@@ -32,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
return err;
}
+static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+ struct rt6_info *rt;
+ struct flowi fl_tunnel = {
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *(struct in6_addr *)&daddr->a6,
+ },
+ },
+ };
+
+ if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+ ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
+ (struct in6_addr *)&saddr->a6);
+ dst_release(&rt->u.dst);
+ return 0;
+ }
+ return -EHOSTUNREACH;
+}
+
static struct dst_entry *
__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
@@ -51,7 +73,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
xdst->u.rt6.rt6i_src.plen);
if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
- xfrm_bundle_ok(xdst, fl, AF_INET6)) {
+ xfrm_bundle_ok(xdst, fl, AF_INET6,
+ (xdst->u.rt6.rt6i_dst.plen != 128 ||
+ xdst->u.rt6.rt6i_src.plen != 128))) {
dst_clone(dst);
break;
}
@@ -60,6 +84,40 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
return dst;
}
+static inline struct in6_addr*
+__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
+{
+ return (x->type->remote_addr) ?
+ (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
+ (struct in6_addr*)&x->id.daddr;
+}
+
+static inline struct in6_addr*
+__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
+{
+ return (x->type->local_addr) ?
+ (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) :
+ (struct in6_addr*)&x->props.saddr;
+}
+
+static inline void
+__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
+{
+ if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+ *nflen += x->props.header_len;
+ else
+ *len += x->props.header_len;
+}
+
+static inline void
+__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
+{
+ if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+ *nflen -= x->props.header_len;
+ else
+ *len -= x->props.header_len;
+}
+
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
* all the metrics... Shortly, bundle a bundle.
*/
@@ -84,6 +142,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
int i;
int err = 0;
int header_len = 0;
+ int nfheader_len = 0;
int trailer_len = 0;
dst = dst_prev = NULL;
@@ -110,17 +169,18 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
xdst = (struct xfrm_dst *)dst1;
xdst->route = &rt->u.dst;
+ xdst->genid = xfrm[i]->genid;
if (rt->rt6i_node)
xdst->route_cookie = rt->rt6i_node->fn_sernum;
dst1->next = dst_prev;
dst_prev = dst1;
- if (xfrm[i]->props.mode) {
- remote = (struct in6_addr*)&xfrm[i]->id.daddr;
- local = (struct in6_addr*)&xfrm[i]->props.saddr;
+ if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+ remote = __xfrm6_bundle_addr_remote(xfrm[i], remote);
+ local = __xfrm6_bundle_addr_local(xfrm[i], local);
tunnel = 1;
}
- header_len += xfrm[i]->props.header_len;
+ __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
trailer_len += xfrm[i]->props.trailer_len;
if (tunnel) {
@@ -155,6 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
+ dst_prev->nfheader_len = nfheader_len;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
@@ -173,7 +234,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
x->u.rt6.rt6i_src = rt0->rt6i_src;
x->u.rt6.rt6i_idev = rt0->rt6i_idev;
in6_dev_hold(rt0->rt6i_idev);
- header_len -= x->u.dst.xfrm->props.header_len;
+ __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
trailer_len -= x->u.dst.xfrm->props.trailer_len;
}
@@ -233,6 +294,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
fl->proto = nexthdr;
return;
+#ifdef CONFIG_IPV6_MIP6
+ case IPPROTO_MH:
+ if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+ struct ip6_mh *mh;
+ mh = (struct ip6_mh *)exthdr;
+
+ fl->fl_mh_type = mh->ip6mh_type;
+ }
+ fl->proto = nexthdr;
+ return;
+#endif
+
/* XXX Why are there these headers? */
case IPPROTO_AH:
case IPPROTO_ESP:
@@ -309,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
.dst_ops = &xfrm6_dst_ops,
.dst_lookup = xfrm6_dst_lookup,
+ .get_saddr = xfrm6_get_saddr,
.find_bundle = __xfrm6_find_bundle,
.bundle_create = __xfrm6_bundle_create,
.decode_session = _decode_session6,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b33296b3f6de..9ddaa9d41539 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -29,9 +29,9 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
x->sel.dport = xfrm_flowi_dport(fl);
- x->sel.dport_mask = ~0;
+ x->sel.dport_mask = htons(0xffff);
x->sel.sport = xfrm_flowi_sport(fl);
- x->sel.sport_mask = ~0;
+ x->sel.sport_mask = htons(0xffff);
x->sel.prefixlen_d = 128;
x->sel.prefixlen_s = 128;
x->sel.proto = fl->proto;
@@ -42,102 +42,135 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
- if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
- struct rt6_info *rt;
- struct flowi fl_tunnel = {
- .nl_u = {
- .ip6_u = {
- .daddr = *(struct in6_addr *)daddr,
- }
- }
- };
- if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
- &fl_tunnel, AF_INET6)) {
- ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
- (struct in6_addr *)&x->props.saddr);
- dst_release(&rt->u.dst);
- }
- }
x->props.mode = tmpl->mode;
x->props.reqid = tmpl->reqid;
x->props.family = AF_INET6;
}
-static struct xfrm_state *
-__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
{
- unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
- struct xfrm_state *x;
-
- list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
- if (x->props.family == AF_INET6 &&
- spi == x->id.spi &&
- ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
- proto == x->id.proto) {
- xfrm_state_hold(x);
- return x;
+ int i;
+ int j = 0;
+
+ /* Rule 1: select IPsec transport except AH */
+ for (i = 0; i < n; i++) {
+ if (src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+ src[i]->id.proto != IPPROTO_AH) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+ if (j == n)
+ goto end;
+
+ /* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+ src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+ if (j == n)
+ goto end;
+#endif
+
+ /* Rule 3: select IPsec transport AH */
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+ src[i]->id.proto == IPPROTO_AH) {
+ dst[j++] = src[i];
+ src[i] = NULL;
}
}
- return NULL;
+ if (j == n)
+ goto end;
+
+ /* Rule 4: select IPsec tunnel */
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ src[i]->props.mode == XFRM_MODE_TUNNEL) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+ if (likely(j == n))
+ goto end;
+
+ /* Final rule */
+ for (i = 0; i < n; i++) {
+ if (src[i]) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+
+ end:
+ return 0;
}
-static struct xfrm_state *
-__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto,
- xfrm_address_t *daddr, xfrm_address_t *saddr,
- int create)
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
{
- struct xfrm_state *x, *x0;
- unsigned h = __xfrm6_dst_hash(daddr);
-
- x0 = NULL;
-
- list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
- if (x->props.family == AF_INET6 &&
- ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
- mode == x->props.mode &&
- proto == x->id.proto &&
- ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
- reqid == x->props.reqid &&
- x->km.state == XFRM_STATE_ACQ &&
- !x->id.spi) {
- x0 = x;
- break;
- }
+ int i;
+ int j = 0;
+
+ /* Rule 1: select IPsec transport */
+ for (i = 0; i < n; i++) {
+ if (src[i]->mode == XFRM_MODE_TRANSPORT) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
}
- if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
- ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
- (struct in6_addr *)daddr);
- ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
- (struct in6_addr *)saddr);
- x0->sel.prefixlen_d = 128;
- x0->sel.prefixlen_s = 128;
- ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
- (struct in6_addr *)saddr);
- x0->km.state = XFRM_STATE_ACQ;
- ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
- (struct in6_addr *)daddr);
- x0->id.proto = proto;
- x0->props.family = AF_INET6;
- x0->props.mode = mode;
- x0->props.reqid = reqid;
- x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
- xfrm_state_hold(x0);
- x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
- add_timer(&x0->timer);
- xfrm_state_hold(x0);
- list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
- wake_up(&km_waitq);
+ if (j == n)
+ goto end;
+
+ /* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+ src[i]->mode == XFRM_MODE_IN_TRIGGER)) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
}
- if (x0)
- xfrm_state_hold(x0);
- return x0;
+ if (j == n)
+ goto end;
+#endif
+
+ /* Rule 3: select IPsec tunnel */
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ src[i]->mode == XFRM_MODE_TUNNEL) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+ if (likely(j == n))
+ goto end;
+
+ /* Final rule */
+ for (i = 0; i < n; i++) {
+ if (src[i]) {
+ dst[j++] = src[i];
+ src[i] = NULL;
+ }
+ }
+
+ end:
+ return 0;
}
static struct xfrm_state_afinfo xfrm6_state_afinfo = {
.family = AF_INET6,
.init_tempsel = __xfrm6_init_tempsel,
- .state_lookup = __xfrm6_state_lookup,
- .find_acq = __xfrm6_find_acq,
+ .tmpl_sort = __xfrm6_tmpl_sort,
+ .state_sort = __xfrm6_state_sort,
};
void __init xfrm6_state_init(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index d37768e5064f..7af227bb1551 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -21,7 +21,6 @@
* Based on net/ipv4/xfrm4_tunnel.c
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/xfrm.h>
#include <linux/list.h>
@@ -32,27 +31,6 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
-# define X6TDEBUG 3
-#else
-# define X6TDEBUG 1
-#endif
-
-#define X6TPRINTK(fmt, args...) printk(fmt, ## args)
-#define X6TNOPRINTK(fmt, args...) do { ; } while(0)
-
-#if X6TDEBUG >= 1
-# define X6TPRINTK1 X6TPRINTK
-#else
-# define X6TPRINTK1 X6TNOPRINTK
-#endif
-
-#if X6TDEBUG >= 3
-# define X6TPRINTK3 X6TPRINTK
-#else
-# define X6TPRINTK3 X6TNOPRINTK
-#endif
-
/*
* xfrm_tunnel_spi things are for allocating unique id ("spi")
* per xfrm_address_t.
@@ -63,15 +41,8 @@ struct xfrm6_tunnel_spi {
xfrm_address_t addr;
u32 spi;
atomic_t refcnt;
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
- u32 magic;
-#endif
};
-#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
-# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef
-#endif
-
static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
static u32 xfrm6_tunnel_spi;
@@ -87,43 +58,15 @@ static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
-static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
- const char *name)
-{
- if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) {
- X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
- "at %p has corrupted magic %08x "
- "(should be %08x)\n",
- name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC);
- return -1;
- }
- return 0;
-}
-#else
-static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
- const char *name)
-{
- return 0;
-}
-#endif
-
-#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__)
-
-
static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
{
unsigned h;
- X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr);
-
h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
h ^= h >> 16;
h ^= h >> 8;
h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
- X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h);
-
return h;
}
@@ -137,19 +80,13 @@ static int xfrm6_tunnel_spi_init(void)
{
int i;
- X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
xfrm6_tunnel_spi = 0;
xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
sizeof(struct xfrm6_tunnel_spi),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
- if (!xfrm6_tunnel_spi_kmem) {
- X6TPRINTK1(KERN_ERR
- "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n",
- __FUNCTION__);
+ if (!xfrm6_tunnel_spi_kmem)
return -ENOMEM;
- }
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
@@ -162,22 +99,16 @@ static void xfrm6_tunnel_spi_fini(void)
{
int i;
- X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
- goto err;
+ return;
}
for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
- goto err;
+ return;
}
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
xfrm6_tunnel_spi_kmem = NULL;
- return;
-err:
- X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__);
- return;
}
static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
@@ -185,19 +116,13 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
struct hlist_node *pos;
- X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
hlist_for_each_entry(x6spi, pos,
&xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr) {
- if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
- X6SPI_CHECK_MAGIC(x6spi);
- X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi);
+ if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
return x6spi;
- }
}
- X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__);
return NULL;
}
@@ -206,8 +131,6 @@ u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
- X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
read_lock_bh(&xfrm6_tunnel_spi_lock);
x6spi = __xfrm6_tunnel_spi_lookup(saddr);
spi = x6spi ? x6spi->spi : 0;
@@ -224,8 +147,6 @@ static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
struct hlist_node *pos;
unsigned index;
- X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
@@ -259,18 +180,10 @@ try_next_2:;
spi = 0;
goto out;
alloc_spi:
- X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for " NIP6_FMT "\n",
- __FUNCTION__,
- NIP6(*(struct in6_addr *)saddr));
x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
- if (!x6spi) {
- X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n",
- __FUNCTION__);
+ if (!x6spi)
goto out;
- }
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
- x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC;
-#endif
+
memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
x6spi->spi = spi;
atomic_set(&x6spi->refcnt, 1);
@@ -279,9 +192,7 @@ alloc_spi:
index = xfrm6_tunnel_spi_hash_byaddr(saddr);
hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
- X6SPI_CHECK_MAGIC(x6spi);
out:
- X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
return spi;
}
@@ -290,8 +201,6 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
- X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
write_lock_bh(&xfrm6_tunnel_spi_lock);
x6spi = __xfrm6_tunnel_spi_lookup(saddr);
if (x6spi) {
@@ -301,8 +210,6 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
spi = __xfrm6_tunnel_alloc_spi(saddr);
write_unlock_bh(&xfrm6_tunnel_spi_lock);
- X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
-
return spi;
}
@@ -313,8 +220,6 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
struct hlist_node *pos, *n;
- X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
write_lock_bh(&xfrm6_tunnel_spi_lock);
hlist_for_each_entry_safe(x6spi, pos, n,
@@ -322,12 +227,6 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
list_byaddr)
{
if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
- X6TPRINTK3(KERN_DEBUG "%s(): x6spi object for " NIP6_FMT
- " found at %p\n",
- __FUNCTION__,
- NIP6(*(struct in6_addr *)saddr),
- x6spi);
- X6SPI_CHECK_MAGIC(x6spi);
if (atomic_dec_and_test(&x6spi->refcnt)) {
hlist_del(&x6spi->list_byaddr);
hlist_del(&x6spi->list_byspi);
@@ -359,7 +258,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_rcv(struct sk_buff *skb)
{
struct ipv6hdr *iph = skb->nh.ipv6h;
- u32 spi;
+ __be32 spi;
spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
return xfrm6_rcv_spi(skb, spi);
@@ -378,20 +277,14 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case ICMPV6_ADDR_UNREACH:
case ICMPV6_PORT_UNREACH:
default:
- X6TPRINTK3(KERN_DEBUG
- "xfrm6_tunnel: Destination Unreach.\n");
break;
}
break;
case ICMPV6_PKT_TOOBIG:
- X6TPRINTK3(KERN_DEBUG
- "xfrm6_tunnel: Packet Too Big.\n");
break;
case ICMPV6_TIME_EXCEED:
switch (code) {
case ICMPV6_EXC_HOPLIMIT:
- X6TPRINTK3(KERN_DEBUG
- "xfrm6_tunnel: Too small Hoplimit.\n");
break;
case ICMPV6_EXC_FRAGTIME:
default:
@@ -414,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int xfrm6_tunnel_init_state(struct xfrm_state *x)
{
- if (!x->props.mode)
+ if (x->props.mode != XFRM_MODE_TUNNEL)
return -EINVAL;
if (x->encap)
@@ -448,22 +341,14 @@ static struct xfrm6_tunnel xfrm6_tunnel_handler = {
static int __init xfrm6_tunnel_init(void)
{
- X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
- if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
- X6TPRINTK1(KERN_ERR
- "xfrm6_tunnel init: can't add xfrm type\n");
+ if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0)
return -EAGAIN;
- }
+
if (xfrm6_tunnel_register(&xfrm6_tunnel_handler)) {
- X6TPRINTK1(KERN_ERR
- "xfrm6_tunnel init(): can't add handler\n");
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
return -EAGAIN;
}
if (xfrm6_tunnel_spi_init() < 0) {
- X6TPRINTK1(KERN_ERR
- "xfrm6_tunnel init: failed to initialize spi\n");
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
return -EAGAIN;
@@ -473,15 +358,9 @@ static int __init xfrm6_tunnel_init(void)
static void __exit xfrm6_tunnel_fini(void)
{
- X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
xfrm6_tunnel_spi_fini();
- if (xfrm6_tunnel_deregister(&xfrm6_tunnel_handler))
- X6TPRINTK1(KERN_ERR
- "xfrm6_tunnel close: can't remove handler\n");
- if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0)
- X6TPRINTK1(KERN_ERR
- "xfrm6_tunnel close: can't remove xfrm type\n");
+ xfrm6_tunnel_deregister(&xfrm6_tunnel_handler);
+ xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
}
module_init(xfrm6_tunnel_init);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 811d998725bc..bef3f61569f7 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -28,7 +28,6 @@
* See net/ipx/ChangeLog.
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
@@ -1643,13 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out;
- ipx = ipx_hdr(skb);
- ipx_pktsize = ntohs(ipx->ipx_pktsize);
+ if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
+ goto drop;
+
+ ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize);
/* Too small or invalid header? */
- if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len)
+ if (ipx_pktsize < sizeof(struct ipxhdr) ||
+ !pskb_may_pull(skb, ipx_pktsize))
goto drop;
+ ipx = ipx_hdr(skb);
if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
goto drop;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 1f73d9ea434d..4c0c71206e54 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -4,7 +4,6 @@
* Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002
*/
-#include <linux/config.h>
#include <linux/init.h>
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index bba3431cd9a5..a30dbb1e08fb 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -7,7 +7,6 @@
* See net/ipx/ChangeLog.
*/
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/route.h>
#include <linux/spinlock.h>
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 510eda96d10a..fa574735c76f 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -6,7 +6,6 @@
* Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 2f37c9f35e27..7e1aea89ef05 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -42,7 +42,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -133,13 +132,14 @@ static void irda_disconnect_indication(void *instance, void *sap,
/* Prevent race conditions with irda_release() and irda_shutdown() */
if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
+ lock_sock(sk);
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_shutdown |= SEND_SHUTDOWN;
sk->sk_state_change(sk);
- /* Uh-oh... Should use sock_orphan ? */
- sock_set_flag(sk, SOCK_DEAD);
+ sock_orphan(sk);
+ release_sock(sk);
/* Close our TSAP.
* If we leave it open, IrLMP put it back into the list of
@@ -309,7 +309,8 @@ static void irda_connect_response(struct irda_sock *self)
IRDA_ASSERT(self != NULL, return;);
- skb = dev_alloc_skb(64);
+ skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
+ GFP_ATOMIC);
if (skb == NULL) {
IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
__FUNCTION__);
@@ -1213,6 +1214,7 @@ static int irda_release(struct socket *sock)
if (sk == NULL)
return 0;
+ lock_sock(sk);
sk->sk_state = TCP_CLOSE;
sk->sk_shutdown |= SEND_SHUTDOWN;
sk->sk_state_change(sk);
@@ -1222,6 +1224,7 @@ static int irda_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
+ release_sock(sk);
/* Purge queues (see sock_init_data()) */
skb_queue_purge(&sk->sk_receive_queue);
@@ -1354,6 +1357,7 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
IRDA_ASSERT(self != NULL, return -1;);
+ IRDA_ASSERT(!sock_error(sk), return -1;);
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
@@ -1406,6 +1410,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
IRDA_ASSERT(self != NULL, return -1;);
+ IRDA_ASSERT(!sock_error(sk), return -1;);
if (sock->flags & __SO_ACCEPTCON)
return(-EINVAL);
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 286881978858..ad6b6af3dd97 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -29,7 +29,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
@@ -116,12 +115,10 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
IRDA_ASSERT(ircomm != NULL, return NULL;);
- self = kmalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
if (self == NULL)
return NULL;
- memset(self, 0, sizeof(struct ircomm_cb));
-
self->notify = *notify;
self->magic = IRCOMM_MAGIC;
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
index d9097207aed3..c8e0d89ee11f 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -81,7 +81,7 @@ static int ircomm_lmp_connect_response(struct ircomm_cb *self,
/* Any userdata supplied? */
if (userdata == NULL) {
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
@@ -115,7 +115,7 @@ static int ircomm_lmp_disconnect_request(struct ircomm_cb *self,
IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
if (!userdata) {
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 6009bab05091..a39f5735a90b 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -121,7 +121,7 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
skb = self->ctrl_skb;
if (!skb) {
- skb = dev_alloc_skb(256);
+ skb = alloc_skb(256, GFP_ATOMIC);
if (!skb) {
spin_unlock_irqrestore(&self->spinlock, flags);
return -ENOMEM;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 6f20b4206e08..3bcdb467efc5 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -30,7 +30,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
@@ -124,7 +123,6 @@ static int __init ircomm_tty_init(void)
driver->owner = THIS_MODULE;
driver->driver_name = "ircomm";
driver->name = "ircomm";
- driver->devfs_name = "ircomm";
driver->major = IRCOMM_TTY_MAJOR;
driver->minor_start = IRCOMM_TTY_MINOR;
driver->type = TTY_DRIVER_TYPE_SERIAL;
@@ -381,12 +379,11 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
self = hashbin_lock_find(ircomm_tty, line, NULL);
if (!self) {
/* No, so make new instance */
- self = kmalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
+ self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
if (self == NULL) {
IRDA_ERROR("%s(), kmalloc failed!\n", __FUNCTION__);
return -ENOMEM;
}
- memset(self, 0, sizeof(struct ircomm_tty_cb));
self->magic = IRCOMM_TTY_MAGIC;
self->flow = FLOW_STOP;
@@ -761,8 +758,9 @@ static int ircomm_tty_write(struct tty_struct *tty,
}
} else {
/* Prepare a full sized frame */
- skb = dev_alloc_skb(self->max_data_size+
- self->max_header_size);
+ skb = alloc_skb(self->max_data_size+
+ self->max_header_size,
+ GFP_ATOMIC);
if (!skb) {
spin_unlock_irqrestore(&self->spinlock, flags);
return -ENOBUFS;
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e3debbdb67f5..7e7a31798d8d 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -29,7 +29,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/string.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
@@ -402,12 +401,10 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
}
/* Allocate dongle info for this instance */
- dongle = kmalloc(sizeof(dongle_t), GFP_KERNEL);
+ dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
if (!dongle)
goto out;
- memset(dongle, 0, sizeof(dongle_t));
-
/* Bind the registration info to this particular instance */
dongle->issue = reg;
dongle->dev = dev;
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 2d2e2b1919f4..415cf4eec23b 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -24,7 +24,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/skbuff.h>
@@ -346,10 +345,11 @@ static void iriap_disconnect_request(struct iriap_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (tx_skb == NULL) {
- IRDA_DEBUG(0, "%s(), Could not allocate an sk_buff of length %d\n",
- __FUNCTION__, 64);
+ IRDA_DEBUG(0,
+ "%s(), Could not allocate an sk_buff of length %d\n",
+ __FUNCTION__, LMP_MAX_HEADER);
return;
}
@@ -397,7 +397,7 @@ int iriap_getvaluebyclass_request(struct iriap_cb *self,
attr_len = strlen(attr); /* Up to IAS_MAX_ATTRIBNAME = 60 */
skb_len = self->max_header_size+2+name_len+1+attr_len+4;
- tx_skb = dev_alloc_skb(skb_len);
+ tx_skb = alloc_skb(skb_len, GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
@@ -563,7 +563,8 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self,
* value. We add 32 bytes because of the 6 bytes for the frame and
* max 5 bytes for the value coding.
*/
- tx_skb = dev_alloc_skb(value->len + self->max_header_size + 32);
+ tx_skb = alloc_skb(value->len + self->max_header_size + 32,
+ GFP_ATOMIC);
if (!tx_skb)
return;
@@ -701,7 +702,7 @@ void iriap_send_ack(struct iriap_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC);
if (!tx_skb)
return;
diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c
index a73607450de1..99b18dc7a0b7 100644
--- a/net/irda/iriap_event.c
+++ b/net/irda/iriap_event.c
@@ -365,7 +365,7 @@ static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
switch (event) {
case IAP_LM_CONNECT_INDICATION:
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (tx_skb == NULL) {
IRDA_WARNING("%s: unable to malloc!\n", __FUNCTION__);
return;
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 82e665c79991..a154b1d71c0f 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -82,13 +82,12 @@ struct ias_object *irias_new_object( char *name, int id)
IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
- obj = kmalloc(sizeof(struct ias_object), GFP_ATOMIC);
+ obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC);
if (obj == NULL) {
IRDA_WARNING("%s(), Unable to allocate object!\n",
__FUNCTION__);
return NULL;
}
- memset(obj, 0, sizeof( struct ias_object));
obj->magic = IAS_OBJECT_MAGIC;
obj->name = strndup(name, IAS_MAX_CLASSNAME);
@@ -346,13 +345,12 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
IRDA_ASSERT(name != NULL, return;);
- attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
+ attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
if (attrib == NULL) {
IRDA_WARNING("%s: Unable to allocate attribute!\n",
__FUNCTION__);
return;
}
- memset(attrib, 0, sizeof( struct ias_attrib));
attrib->magic = IAS_ATTRIB_MAGIC;
attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -382,13 +380,12 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
IRDA_ASSERT(name != NULL, return;);
IRDA_ASSERT(octets != NULL, return;);
- attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
+ attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
if (attrib == NULL) {
IRDA_WARNING("%s: Unable to allocate attribute!\n",
__FUNCTION__);
return;
}
- memset(attrib, 0, sizeof( struct ias_attrib));
attrib->magic = IAS_ATTRIB_MAGIC;
attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -416,13 +413,12 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
IRDA_ASSERT(name != NULL, return;);
IRDA_ASSERT(value != NULL, return;);
- attrib = kmalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
+ attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
if (attrib == NULL) {
IRDA_WARNING("%s: Unable to allocate attribute!\n",
__FUNCTION__);
return;
}
- memset(attrib, 0, sizeof( struct ias_attrib));
attrib->magic = IAS_ATTRIB_MAGIC;
attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -443,12 +439,11 @@ struct ias_value *irias_new_integer_value(int integer)
{
struct ias_value *value;
- value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+ value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
if (value == NULL) {
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
return NULL;
}
- memset(value, 0, sizeof(struct ias_value));
value->type = IAS_INTEGER;
value->len = 4;
@@ -469,12 +464,11 @@ struct ias_value *irias_new_string_value(char *string)
{
struct ias_value *value;
- value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+ value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
if (value == NULL) {
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
return NULL;
}
- memset( value, 0, sizeof( struct ias_value));
value->type = IAS_STRING;
value->charset = CS_ASCII;
@@ -495,12 +489,11 @@ struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
{
struct ias_value *value;
- value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+ value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
if (value == NULL) {
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
return NULL;
}
- memset(value, 0, sizeof(struct ias_value));
value->type = IAS_OCT_SEQ;
/* Check length */
@@ -522,12 +515,11 @@ struct ias_value *irias_new_missing_value(void)
{
struct ias_value *value;
- value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+ value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
if (value == NULL) {
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
return NULL;
}
- memset(value, 0, sizeof(struct ias_value));
value->type = IAS_MISSING;
value->len = 0;
diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
index f8e6cb0db04b..95cf1234ea17 100644
--- a/net/irda/irlan/irlan_client.c
+++ b/net/irda/irlan/irlan_client.c
@@ -173,13 +173,14 @@ void irlan_client_discovery_indication(discinfo_t *discovery,
rcu_read_lock();
self = irlan_get_any();
if (self) {
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+ IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;);
IRDA_DEBUG(1, "%s(), Found instance (%08x)!\n", __FUNCTION__ ,
daddr);
irlan_client_wakeup(self, saddr, daddr);
}
+IRDA_ASSERT_LABEL(out:)
rcu_read_unlock();
}
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 657d12210578..9b962f247714 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -23,7 +23,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -637,7 +636,8 @@ void irlan_get_provider_info(struct irlan_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(64);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER,
+ GFP_ATOMIC);
if (!skb)
return;
@@ -669,7 +669,10 @@ void irlan_open_data_channel(struct irlan_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(64);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") +
+ IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -705,7 +708,9 @@ void irlan_close_data_channel(struct irlan_cb *self)
if (self->client.tsap_ctrl == NULL)
return;
- skb = dev_alloc_skb(64);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -716,7 +721,7 @@ void irlan_close_data_channel(struct irlan_cb *self)
/* Build frame */
frame[0] = CMD_CLOSE_DATA_CHAN;
- frame[1] = 0x01; /* Two parameters */
+ frame[1] = 0x01; /* One parameter */
irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
@@ -740,7 +745,11 @@ static void irlan_open_unicast_addr(struct irlan_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(128);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -778,7 +787,12 @@ void irlan_set_broadcast_filter(struct irlan_cb *self, int status)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(128);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") +
+ /* We may waste one byte here...*/
+ IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -817,7 +831,12 @@ void irlan_set_multicast_filter(struct irlan_cb *self, int status)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(128);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
+ /* We may waste one byte here...*/
+ IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -857,7 +876,12 @@ static void irlan_get_unicast_addr(struct irlan_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(128);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION",
+ "DYNAMIC"),
+ GFP_ATOMIC);
if (!skb)
return;
@@ -892,7 +916,10 @@ void irlan_get_media_char(struct irlan_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(64);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"),
+ GFP_ATOMIC);
+
if (!skb)
return;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 953e255d2bc8..b0ccc455b747 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -25,7 +25,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
index 39c202d1c374..58efde919667 100644
--- a/net/irda/irlan/irlan_provider.c
+++ b/net/irda/irlan/irlan_provider.c
@@ -296,7 +296,14 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command,
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- skb = dev_alloc_skb(128);
+ skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
+ /* Bigger param length comes from CMD_GET_MEDIA_CHAR */
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BORADCAST") +
+ IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
+ IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"),
+ GFP_ATOMIC);
+
if (!skb)
return;
@@ -354,8 +361,7 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command,
} else
skb->data[1] = 0x02; /* 2 parameters */
irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data);
- irlan_insert_array_param(skb, "RECONNECT_KEY", "LINUX RULES!",
- 12);
+ irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!");
break;
case CMD_FILTER_OPERATION:
irlan_filter_request(self, skb);
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index a16528657b4c..e7852a07495e 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -29,7 +29,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/skbuff.h>
@@ -117,11 +116,10 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
/* Initialize the irlap structure. */
- self = kmalloc(sizeof(struct irlap_cb), GFP_KERNEL);
+ self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL);
if (self == NULL)
return NULL;
- memset(self, 0, sizeof(struct irlap_cb));
self->magic = LAP_MAGIC;
/* Make a binding between the layers */
@@ -883,7 +881,7 @@ static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now)
/* Change speed now, or just piggyback speed on frames */
if (now) {
/* Send down empty frame to trigger speed change */
- skb = dev_alloc_skb(0);
+ skb = alloc_skb(0, GFP_ATOMIC);
if (skb)
irlap_queue_xmit(self, skb);
}
@@ -1223,7 +1221,7 @@ static int irlap_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct irlap_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct irlap_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1239,7 +1237,6 @@ static int irlap_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index a505b5457608..99faff68c399 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -25,7 +25,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/delay.h>
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 3e9a06abbdd0..dba349c832d0 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -117,7 +117,9 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
/* Allocate frame */
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(sizeof(struct snrm_frame) +
+ IRLAP_NEGOCIATION_PARAMS_LEN,
+ GFP_ATOMIC);
if (!tx_skb)
return;
@@ -136,7 +138,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
* If we are establishing a connection then insert QoS paramerters
*/
if (qos) {
- skb_put(tx_skb, 9); /* 21 left */
+ skb_put(tx_skb, 9); /* 25 left */
frame->saddr = cpu_to_le32(self->saddr);
frame->daddr = cpu_to_le32(self->daddr);
@@ -210,7 +212,9 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
/* Allocate frame */
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(sizeof(struct ua_frame) +
+ IRLAP_NEGOCIATION_PARAMS_LEN,
+ GFP_ATOMIC);
if (!tx_skb)
return;
@@ -245,23 +249,23 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
void irlap_send_dm_frame( struct irlap_cb *self)
{
struct sk_buff *tx_skb = NULL;
- __u8 *frame;
+ struct dm_frame *frame;
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- tx_skb = dev_alloc_skb(32);
+ tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC);
if (!tx_skb)
return;
- frame = skb_put(tx_skb, 2);
+ frame = (struct dm_frame *)skb_put(tx_skb, 2);
if (self->state == LAP_NDM)
- frame[0] = CBROADCAST;
+ frame->caddr = CBROADCAST;
else
- frame[0] = self->caddr;
+ frame->caddr = self->caddr;
- frame[1] = DM_RSP | PF_BIT;
+ frame->control = DM_RSP | PF_BIT;
irlap_queue_xmit(self, tx_skb);
}
@@ -275,21 +279,21 @@ void irlap_send_dm_frame( struct irlap_cb *self)
void irlap_send_disc_frame(struct irlap_cb *self)
{
struct sk_buff *tx_skb = NULL;
- __u8 *frame;
+ struct disc_frame *frame;
IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- tx_skb = dev_alloc_skb(16);
+ tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC);
if (!tx_skb)
return;
- frame = skb_put(tx_skb, 2);
+ frame = (struct disc_frame *)skb_put(tx_skb, 2);
- frame[0] = self->caddr | CMD_FRAME;
- frame[1] = DISC_CMD | PF_BIT;
+ frame->caddr = self->caddr | CMD_FRAME;
+ frame->control = DISC_CMD | PF_BIT;
irlap_queue_xmit(self, tx_skb);
}
@@ -315,7 +319,8 @@ void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s,
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
IRDA_ASSERT(discovery != NULL, return;);
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN,
+ GFP_ATOMIC);
if (!tx_skb)
return;
@@ -422,11 +427,10 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
return;
}
- if ((discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
+ if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
IRDA_WARNING("%s: kmalloc failed!\n", __FUNCTION__);
return;
}
- memset(discovery, 0, sizeof(discovery_t));
discovery->data.daddr = info->daddr;
discovery->data.saddr = self->saddr;
@@ -574,18 +578,18 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
void irlap_send_rr_frame(struct irlap_cb *self, int command)
{
struct sk_buff *tx_skb;
- __u8 *frame;
+ struct rr_frame *frame;
- tx_skb = dev_alloc_skb(16);
+ tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC);
if (!tx_skb)
return;
- frame = skb_put(tx_skb, 2);
+ frame = (struct rr_frame *)skb_put(tx_skb, 2);
- frame[0] = self->caddr;
- frame[0] |= (command) ? CMD_FRAME : 0;
+ frame->caddr = self->caddr;
+ frame->caddr |= (command) ? CMD_FRAME : 0;
- frame[1] = RR | PF_BIT | (self->vr << 5);
+ frame->control = RR | PF_BIT | (self->vr << 5);
irlap_queue_xmit(self, tx_skb);
}
@@ -599,16 +603,16 @@ void irlap_send_rr_frame(struct irlap_cb *self, int command)
void irlap_send_rd_frame(struct irlap_cb *self)
{
struct sk_buff *tx_skb;
- __u8 *frame;
+ struct rd_frame *frame;
- tx_skb = dev_alloc_skb(16);
+ tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC);
if (!tx_skb)
return;
- frame = skb_put(tx_skb, 2);
+ frame = (struct rd_frame *)skb_put(tx_skb, 2);
- frame[0] = self->caddr;
- frame[1] = RD_RSP | PF_BIT;
+ frame->caddr = self->caddr;
+ frame->caddr = RD_RSP | PF_BIT;
irlap_queue_xmit(self, tx_skb);
}
@@ -1215,7 +1219,7 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
struct test_frame *frame;
__u8 *info;
- tx_skb = dev_alloc_skb(cmd->len+sizeof(struct test_frame));
+ tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC);
if (!tx_skb)
return;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 57ea160f470b..5073261b9d0c 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -24,7 +24,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -79,10 +78,9 @@ int __init irlmp_init(void)
{
IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
/* Initialize the irlmp structure. */
- irlmp = kmalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
+ irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
if (irlmp == NULL)
return -ENOMEM;
- memset(irlmp, 0, sizeof(struct irlmp_cb));
irlmp->magic = LMP_MAGIC;
@@ -161,12 +159,11 @@ struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid)
return NULL;
/* Allocate new instance of a LSAP connection */
- self = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
if (self == NULL) {
IRDA_ERROR("%s: can't allocate memory\n", __FUNCTION__);
return NULL;
}
- memset(self, 0, sizeof(struct lsap_cb));
self->magic = LMP_LSAP_MAGIC;
self->slsap_sel = slsap_sel;
@@ -289,12 +286,11 @@ void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify)
/*
* Allocate new instance of a LSAP connection
*/
- lap = kmalloc(sizeof(struct lap_cb), GFP_KERNEL);
+ lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL);
if (lap == NULL) {
IRDA_ERROR("%s: unable to kmalloc\n", __FUNCTION__);
return;
}
- memset(lap, 0, sizeof(struct lap_cb));
lap->irlap = irlap;
lap->magic = LMP_LAP_MAGIC;
@@ -396,7 +392,7 @@ int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel,
/* Any userdata? */
if (tx_skb == NULL) {
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index 26649f6528e6..4c90dd1b4503 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -24,7 +24,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <net/irda/irda.h>
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 91cd268172fa..39761a1d18f5 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -24,7 +24,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/skbuff.h>
#include <linux/kernel.h>
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index 634901dd156f..2869b16e417d 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -31,7 +31,6 @@
* Jean II
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index e4fe1e80029c..80887528e77e 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -244,12 +244,10 @@
#include <linux/skbuff.h>
#include <linux/tty.h>
#include <linux/proc_fs.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/netdevice.h>
#include <linux/miscdevice.h>
#include <linux/poll.h>
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/ctype.h> /* isspace() */
#include <asm/uaccess.h>
#include <linux/init.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index e53bf9e0053e..a1e502ff9070 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -476,11 +476,10 @@ dev_irnet_open(struct inode * inode,
#endif /* SECURE_DEVIRNET */
/* Allocate a private structure for this IrNET instance */
- ap = kmalloc(sizeof(*ap), GFP_KERNEL);
+ ap = kzalloc(sizeof(*ap), GFP_KERNEL);
DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
/* initialize the irnet structure */
- memset(ap, 0, sizeof(*ap));
ap->file = file;
/* PPP channel setup */
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 1b1c4193359a..86805c3d8324 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -23,7 +23,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/ctype.h>
#include <linux/sysctl.h>
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 8aff254cb418..3c2e70b77df1 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -24,7 +24,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -86,10 +85,9 @@ static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
*/
int __init irttp_init(void)
{
- irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL);
+ irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL);
if (irttp == NULL)
return -ENOMEM;
- memset(irttp, 0, sizeof(struct irttp_cb));
irttp->magic = TTP_MAGIC;
@@ -307,7 +305,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
IRDA_DEBUG(2, "%s(), fragmenting ...\n", __FUNCTION__);
/* Make new segment */
- frag = dev_alloc_skb(self->max_seg_size+self->max_header_size);
+ frag = alloc_skb(self->max_seg_size+self->max_header_size,
+ GFP_ATOMIC);
if (!frag)
return;
@@ -390,12 +389,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
return NULL;
}
- self = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
if (self == NULL) {
IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__);
return NULL;
}
- memset(self, 0, sizeof(struct tsap_cb));
spin_lock_init(&self->lock);
/* Initialise todo timer */
@@ -806,12 +804,12 @@ static inline void irttp_give_credit(struct tsap_cb *self)
self->send_credit, self->avail_credit, self->remote_credit);
/* Give credit to peer */
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC);
if (!tx_skb)
return;
/* Reserve space for LMP, and LAP header */
- skb_reserve(tx_skb, self->max_header_size);
+ skb_reserve(tx_skb, LMP_MAX_HEADER);
/*
* Since we can transmit and receive frames concurrently,
@@ -1095,7 +1093,8 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
/* Any userdata supplied? */
if (userdata == NULL) {
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
+ GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
@@ -1343,7 +1342,8 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
/* Any userdata supplied? */
if (userdata == NULL) {
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
+ GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
@@ -1542,14 +1542,14 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
if (!userdata) {
struct sk_buff *tx_skb;
- tx_skb = dev_alloc_skb(64);
+ tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
if (!tx_skb)
return -ENOMEM;
/*
* Reserve space for MUX and LAP header
*/
- skb_reserve(tx_skb, TTP_MAX_HEADER);
+ skb_reserve(tx_skb, LMP_MAX_HEADER);
userdata = tx_skb;
}
@@ -1877,7 +1877,7 @@ static int irttp_seq_open(struct inode *inode, struct file *file)
int rc = -ENOMEM;
struct irttp_iter_state *s;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1887,7 +1887,6 @@ static int irttp_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/irda/qos.c b/net/irda/qos.c
index ddfb5c502a90..95a69c013ee8 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -30,7 +30,6 @@
*
********************************************************************/
-#include <linux/config.h>
#include <asm/byteorder.h>
#include <net/irda/irda.h>
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 0e17f976add6..3871a2b911f9 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -25,7 +25,6 @@
********************************************************************/
#include <asm/system.h>
-#include <linux/config.h>
#include <linux/delay.h>
#include <net/irda/timer.h>
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d5e2121ea207..ff98e70b0931 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -14,7 +14,6 @@
* Derek Atkins <derek@ihtfp.com>
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -1732,7 +1731,8 @@ static u32 gen_reqid(void)
++reqid;
if (reqid == 0)
reqid = IPSEC_MANUAL_REQID_MAX+1;
- if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
+ if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid,
+ (void*)&reqid) != -EEXIST)
return reqid;
} while (reqid != start);
return 0;
@@ -1766,7 +1766,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
}
/* addresses present only in tunnel mode */
- if (t->mode) {
+ if (t->mode == XFRM_MODE_TUNNEL) {
switch (xp->family) {
case AF_INET:
sin = (void*)(rq+1);
@@ -1998,7 +1998,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
int req_size;
req_size = sizeof(struct sadb_x_ipsecrequest);
- if (t->mode)
+ if (t->mode == XFRM_MODE_TUNNEL)
req_size += 2*socklen;
else
size -= 2*socklen;
@@ -2014,7 +2014,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
if (t->optional)
rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
rq->sadb_x_ipsecrequest_reqid = t->reqid;
- if (t->mode) {
+ if (t->mode == XFRM_MODE_TUNNEL) {
switch (xp->family) {
case AF_INET:
sin = (void*)(rq+1);
@@ -2140,7 +2140,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
if (xp->selector.sport)
- xp->selector.sport_mask = ~0;
+ xp->selector.sport_mask = htons(0xffff);
sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
@@ -2153,7 +2153,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
if (xp->selector.dport)
- xp->selector.dport_mask = ~0;
+ xp->selector.dport_mask = htons(0xffff);
sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
if (sec_ctx != NULL) {
@@ -2243,7 +2243,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
if (sel.sport)
- sel.sport_mask = ~0;
+ sel.sport_mask = htons(0xffff);
sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
@@ -2251,7 +2251,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
if (sel.dport)
- sel.dport_mask = ~0;
+ sel.dport_mask = htons(0xffff);
sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
memset(&tmp, 0, sizeof(struct xfrm_policy));
@@ -2269,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
return err;
}
- xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+ xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
+ &sel, tmp.security, 1);
security_xfrm_policy_free(&tmp);
if (xp == NULL)
return -ENOENT;
@@ -2331,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
if (dir >= XFRM_POLICY_MAX)
return -EINVAL;
- xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id,
+ xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id,
hdr->sadb_msg_type == SADB_X_SPDDELETE2);
if (xp == NULL)
return -ENOENT;
@@ -2379,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
{
struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
- return xfrm_policy_walk(dump_sp, &data);
+ return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data);
}
static int key_notify_policy_flush(struct km_event *c)
@@ -2406,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
{
struct km_event c;
- xfrm_policy_flush();
+ xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
+ c.data.type = XFRM_POLICY_TYPE_MAIN;
c.event = XFRM_MSG_FLUSHPOLICY;
c.pid = hdr->sadb_msg_pid;
c.seq = hdr->sadb_msg_seq;
@@ -2668,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
{
+ if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
+ return 0;
+
switch (c->event) {
case XFRM_MSG_POLEXPIRE:
return key_notify_policy_expire(xp, c);
@@ -2676,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
case XFRM_MSG_UPDPOLICY:
return key_notify_policy(xp, dir, c);
case XFRM_MSG_FLUSHPOLICY:
+ if (c->data.type != XFRM_POLICY_TYPE_MAIN)
+ break;
return key_notify_policy_flush(c);
default:
printk("pfkey: Unknown policy event %d\n", c->event);
@@ -2709,6 +2716,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
#endif
int sockaddr_size;
int size;
+ struct sadb_x_sec_ctx *sec_ctx;
+ struct xfrm_sec_ctx *xfrm_ctx;
+ int ctx_size = 0;
sockaddr_size = pfkey_sockaddr_size(x->props.family);
if (!sockaddr_size)
@@ -2724,6 +2734,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
else if (x->id.proto == IPPROTO_ESP)
size += count_esp_combs(t);
+ if ((xfrm_ctx = x->security)) {
+ ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+ size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
+ }
+
skb = alloc_skb(size + 16, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -2819,17 +2834,31 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
else if (x->id.proto == IPPROTO_ESP)
dump_esp_combs(skb, t);
+ /* security context */
+ if (xfrm_ctx) {
+ sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+ sizeof(struct sadb_x_sec_ctx) + ctx_size);
+ sec_ctx->sadb_x_sec_len =
+ (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+ sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+ sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+ sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+ sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+ memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+ xfrm_ctx->ctx_len);
+ }
+
return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
}
-static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
u8 *data, int len, int *dir)
{
struct xfrm_policy *xp;
struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
struct sadb_x_sec_ctx *sec_ctx;
- switch (family) {
+ switch (sk->sk_family) {
case AF_INET:
if (opt != IP_IPSEC_POLICY) {
*dir = -EOPNOTSUPP;
@@ -2870,7 +2899,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
xp->lft.hard_byte_limit = XFRM_INF;
xp->lft.soft_packet_limit = XFRM_INF;
xp->lft.hard_packet_limit = XFRM_INF;
- xp->family = family;
+ xp->family = sk->sk_family;
xp->xfrm_nr = 0;
if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
@@ -2886,8 +2915,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
p += pol->sadb_x_policy_len*8;
sec_ctx = (struct sadb_x_sec_ctx *)p;
if (len < pol->sadb_x_policy_len*8 +
- sec_ctx->sadb_x_sec_len)
+ sec_ctx->sadb_x_sec_len) {
+ *dir = -EINVAL;
goto out;
+ }
if ((*dir = verify_sec_ctx_len(p)))
goto out;
uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
@@ -2897,6 +2928,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
if (*dir)
goto out;
}
+ else {
+ *dir = security_xfrm_sock_policy_alloc(xp, sk);
+ if (*dir)
+ goto out;
+ }
*dir = pol->sadb_x_policy_dir-1;
return xp;
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index aea6616cea3d..7e6bc41eeb21 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -115,14 +115,12 @@ static struct lapb_cb *lapb_devtostruct(struct net_device *dev)
*/
static struct lapb_cb *lapb_create_cb(void)
{
- struct lapb_cb *lapb = kmalloc(sizeof(*lapb), GFP_ATOMIC);
+ struct lapb_cb *lapb = kzalloc(sizeof(*lapb), GFP_ATOMIC);
if (!lapb)
goto out;
- memset(lapb, 0x00, sizeof(*lapb));
-
skb_queue_head_init(&lapb->write_queue);
skb_queue_head_init(&lapb->ack_queue);
@@ -240,11 +238,13 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
goto out_put;
if (lapb->state == LAPB_STATE_0) {
- if (((parms->mode & LAPB_EXTENDED) &&
- (parms->window < 1 || parms->window > 127)) ||
- (parms->window < 1 || parms->window > 7))
- goto out_put;
-
+ if (parms->mode & LAPB_EXTENDED) {
+ if (parms->window < 1 || parms->window > 127)
+ goto out_put;
+ } else {
+ if (parms->window < 1 || parms->window > 7)
+ goto out_put;
+ }
lapb->mode = parms->mode;
lapb->window = parms->window;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 75c9b1480801..2652ead96c64 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -20,7 +20,6 @@
*
* See the GNU General Public License for more details.
*/
-#include <linux/config.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -785,24 +784,20 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
copied += used;
len -= used;
- if (used + offset < skb->len)
- continue;
-
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, 0);
*seq = 0;
}
+
+ /* For non stream protcols we get one packet per recvmsg call */
+ if (sk->sk_type != SOCK_STREAM)
+ goto copy_uaddr;
+
+ /* Partial read */
+ if (used + offset < skb->len)
+ continue;
} while (len > 0);
- /*
- * According to UNIX98, msg_name/msg_namelen are ignored
- * on connected socket. -ANK
- * But... af_llc still doesn't have separate sets of methods for
- * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will
- * eventually fix this tho :-) -acme
- */
- if (sk->sk_type == SOCK_DGRAM)
- goto copy_uaddr;
out:
release_sock(sk);
return copied;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index bd242a49514a..d12413cff5bd 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -33,10 +33,9 @@ unsigned char llc_station_mac_sa[ETH_ALEN];
*/
static struct llc_sap *llc_sap_alloc(void)
{
- struct llc_sap *sap = kmalloc(sizeof(*sap), GFP_ATOMIC);
+ struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC);
if (sap) {
- memset(sap, 0, sizeof(*sap));
sap->state = LLC_SAP_STATE_ACTIVE;
memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
rwlock_init(&sap->sk_list.lock);
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 5ae47be7dde0..a89917130a7b 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -11,7 +11,6 @@
*
* See the GNU General Public License for more details.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index bd531cb235a7..19308fece3ad 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -12,7 +12,6 @@
* See the GNU General Public License for more details.
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 20c4eb5c1ac6..61cb8cf7d153 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -51,10 +51,10 @@ void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim)
{
struct sockaddr_llc *addr;
- if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */
- return;
/* save primitive for use by the user. */
addr = llc_ui_skb_cb(skb);
+
+ memset(addr, 0, sizeof(*addr));
addr->sllc_family = sk->sk_family;
addr->sllc_arphrd = skb->dev->type;
addr->sllc_test = prim == LLC_TEST_PRIM;
@@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
if (llc->laddr.lsap != laddr->lsap)
continue;
+ if (llc->dev != skb->dev)
+ continue;
+
skb1 = skb_clone(skb, GFP_ATOMIC);
if (!skb1)
break;
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index f37dbf8ef126..8275bd33bd9d 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -11,7 +11,6 @@
*
* See the GNU General Public License for more details.
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/module.h>
#include <net/llc.h>
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index d1eaddb13633..45d7dd92a088 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -4,7 +4,6 @@
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/sysctl.h>
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b1622b7de1cf..0a28d2c5c44f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK
<file:Documentation/modules.txt>. The module will be called
ipt_CONNMARK.o. If unsure, say `N'.
+config NETFILTER_XT_TARGET_DSCP
+ tristate '"DSCP" target support'
+ depends on NETFILTER_XTABLES
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ help
+ This option adds a `DSCP' target, which allows you to manipulate
+ the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+ The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_MARK
tristate '"MARK" target support'
depends on NETFILTER_XTABLES
@@ -263,6 +275,17 @@ config NETFILTER_XT_MATCH_DCCP
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
+config NETFILTER_XT_MATCH_DSCP
+ tristate '"DSCP" match support'
+ depends on NETFILTER_XTABLES
+ help
+ This option adds a `DSCP' match, which allows you to match against
+ the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+ The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_ESP
tristate '"ESP" match support'
depends on NETFILTER_XTABLES
@@ -386,8 +409,8 @@ config NETFILTER_XT_MATCH_REALM
<file:Documentation/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_SCTP
- tristate '"sctp" protocol match support'
- depends on NETFILTER_XTABLES
+ tristate '"sctp" protocol match support (EXPERIMENTAL)'
+ depends on NETFILTER_XTABLES && EXPERIMENTAL
help
With this option enabled, you will be able to use the
`sctp' match in order to match on SCTP source/destination ports
@@ -411,7 +434,10 @@ config NETFILTER_XT_MATCH_STATISTIC
tristate '"statistic" match support'
depends on NETFILTER_XTABLES
help
- statistic module
+ This option adds a `statistic' match, which allows you to match
+ on packets periodically or randomly with a given percentage.
+
+ To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_STRING
tristate '"string" match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6fa4b7580458..a74be492fd0a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -37,6 +38,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8455a32ea5c4..d80b935b3a92 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -10,7 +10,6 @@
* 15-Mar-2000: Added NF_REPEAT --RR.
* 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <net/protocol.h>
@@ -183,7 +182,7 @@ next_hook:
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+ if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
}
@@ -223,6 +222,28 @@ copy_skb:
}
EXPORT_SYMBOL(skb_make_writable);
+u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
+{
+ u_int32_t diff[] = { oldval, newval };
+
+ return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
+}
+EXPORT_SYMBOL(nf_csum_update);
+
+u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+ u_int32_t oldval, u_int32_t newval,
+ u_int16_t csum, int pseudohdr)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ csum = nf_csum_update(oldval, newval, csum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = nf_csum_update(oldval, newval, skb->csum);
+ } else if (pseudohdr)
+ csum = ~nf_csum_update(oldval, newval, ~csum);
+
+ return csum;
+}
+EXPORT_SYMBOL(nf_proto_csum_update);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cd299f4b7db1..093b3ddc513c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -29,7 +29,6 @@
* Derived from net/ipv4/netfilter/ip_conntrack_core.c
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
@@ -58,7 +57,6 @@
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#define NF_CONNTRACK_VERSION "0.5.0"
@@ -75,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0);
void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
LIST_HEAD(nf_conntrack_expect_list);
-struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
-struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
+struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
static LIST_HEAD(helpers);
-unsigned int nf_conntrack_htable_size = 0;
-int nf_conntrack_max;
-struct list_head *nf_conntrack_hash;
-static kmem_cache_t *nf_conntrack_expect_cachep;
+unsigned int nf_conntrack_htable_size __read_mostly = 0;
+int nf_conntrack_max __read_mostly;
+struct list_head *nf_conntrack_hash __read_mostly;
+static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
struct nf_conn nf_conntrack_untracked;
-unsigned int nf_ct_log_invalid;
+unsigned int nf_ct_log_invalid __read_mostly;
static LIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc;
+static int nf_conntrack_vmalloc __read_mostly;
static unsigned int nf_conntrack_next_id;
static unsigned int nf_conntrack_expect_next_id;
@@ -540,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
static void
clean_from_lists(struct nf_conn *ct)
{
- unsigned int ho, hr;
-
DEBUGP("clean_from_lists(%p)\n", ct);
ASSERT_WRITE_LOCK(&nf_conntrack_lock);
-
- ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
- LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
/* Destroy all pending expectations */
nf_ct_remove_expectations(ct);
@@ -618,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
nf_ct_put(ct);
}
-static inline int
-conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack)
-{
- ASSERT_READ_LOCK(&nf_conntrack_lock);
- return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
- && nf_ct_tuple_equal(tuple, &i->tuple);
-}
-
struct nf_conntrack_tuple_hash *
__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
@@ -637,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
ASSERT_READ_LOCK(&nf_conntrack_lock);
list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
- if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
+ nf_ct_tuple_equal(tuple, &h->tuple)) {
NF_CT_STAT_INC(found);
return h;
}
@@ -668,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int repl_hash)
{
ct->id = ++nf_conntrack_next_id;
- list_prepend(&nf_conntrack_hash[hash],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_prepend(&nf_conntrack_hash[repl_hash],
- &ct->tuplehash[IP_CT_DIR_REPLY].list);
+ list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+ &nf_conntrack_hash[hash]);
+ list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+ &nf_conntrack_hash[repl_hash]);
}
void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -691,7 +675,9 @@ int
__nf_conntrack_confirm(struct sk_buff **pskb)
{
unsigned int hash, repl_hash;
+ struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
+ struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
ct = nf_ct_get(*pskb, &ctinfo);
@@ -721,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- if (!LIST_FIND(&nf_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct nf_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
- && !LIST_FIND(&nf_conntrack_hash[repl_hash],
- conntrack_tuple_cmp,
- struct nf_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
- struct nf_conn_help *help;
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_for_each_entry(h, &nf_conntrack_hash[hash], list)
+ if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &h->tuple))
+ goto out;
+ list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
+ if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &h->tuple))
+ goto out;
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- NF_CT_STAT_INC(insert);
- write_unlock_bh(&nf_conntrack_lock);
- help = nfct_help(ct);
- if (help && help->helper)
- nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+ /* Remove from unconfirmed list */
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+ __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ weird delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ NF_CT_STAT_INC(insert);
+ write_unlock_bh(&nf_conntrack_lock);
+ help = nfct_help(ct);
+ if (help && help->helper)
+ nf_conntrack_event_cache(IPCT_HELPER, *pskb);
#ifdef CONFIG_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
#endif
- nf_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
- return NF_ACCEPT;
- }
+ nf_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, *pskb);
+ return NF_ACCEPT;
+out:
NF_CT_STAT_INC(insert_failed);
write_unlock_bh(&nf_conntrack_lock);
return NF_DROP;
@@ -778,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
-{
- return !(test_bit(IPS_ASSURED_BIT,
- &nf_ct_tuplehash_to_ctrack(i)->status));
-}
-
static int early_drop(struct list_head *chain)
{
/* Traverse backwards: gives us oldest, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
- struct nf_conn *ct = NULL;
+ struct nf_conn *ct = NULL, *tmp;
int dropped = 0;
read_lock_bh(&nf_conntrack_lock);
- h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
- if (h) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- atomic_inc(&ct->ct_general.use);
+ list_for_each_entry_reverse(h, chain, list) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+ ct = tmp;
+ atomic_inc(&ct->ct_general.use);
+ break;
+ }
}
read_unlock_bh(&nf_conntrack_lock);
@@ -811,18 +794,16 @@ static int early_drop(struct list_head *chain)
return dropped;
}
-static inline int helper_cmp(const struct nf_conntrack_helper *i,
- const struct nf_conntrack_tuple *rtuple)
-{
- return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
static struct nf_conntrack_helper *
__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
{
- return LIST_FIND(&helpers, helper_cmp,
- struct nf_conntrack_helper *,
- tuple);
+ struct nf_conntrack_helper *h;
+
+ list_for_each_entry(h, &helpers, list) {
+ if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+ return h;
+ }
+ return NULL;
}
struct nf_conntrack_helper *
@@ -867,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
nf_conntrack_hash_rnd_initted = 1;
}
+ /* We don't want any race condition at early drop stage */
+ atomic_inc(&nf_conntrack_count);
+
if (nf_conntrack_max
- && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
+ && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
unsigned int hash = hash_conntrack(orig);
/* Try dropping from this hash chain. */
if (!early_drop(&nf_conntrack_hash[hash])) {
+ atomic_dec(&nf_conntrack_count);
if (net_ratelimit())
printk(KERN_WARNING
"nf_conntrack: table full, dropping"
@@ -922,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
init_timer(&conntrack->timeout);
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
+ read_unlock_bh(&nf_ct_cache_lock);
- atomic_inc(&nf_conntrack_count);
+ return conntrack;
out:
read_unlock_bh(&nf_ct_cache_lock);
+ atomic_dec(&nf_conntrack_count);
return conntrack;
}
@@ -1324,7 +1311,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
return ret;
}
write_lock_bh(&nf_conntrack_lock);
- list_prepend(&helpers, me);
+ list_add(&me->list, &helpers);
write_unlock_bh(&nf_conntrack_lock);
return 0;
@@ -1343,8 +1330,8 @@ __nf_conntrack_helper_find_byname(const char *name)
return NULL;
}
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
- const struct nf_conntrack_helper *me)
+static inline void unhelp(struct nf_conntrack_tuple_hash *i,
+ const struct nf_conntrack_helper *me)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
@@ -1353,17 +1340,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
nf_conntrack_event(IPCT_HELPER, ct);
help->helper = NULL;
}
- return 0;
}
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
{
unsigned int i;
+ struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
write_lock_bh(&nf_conntrack_lock);
- LIST_DELETE(&helpers, me);
+ list_del(&me->list);
/* Get rid of expectations */
list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
@@ -1375,10 +1362,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
}
/* Get rid of expecteds, set helpers to NULL. */
- LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
- for (i = 0; i < nf_conntrack_htable_size; i++)
- LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
- struct nf_conntrack_tuple_hash *, me);
+ list_for_each_entry(h, &unconfirmed, list)
+ unhelp(h, me);
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ list_for_each_entry(h, &nf_conntrack_hash[i], list)
+ unhelp(h, me);
+ }
write_unlock_bh(&nf_conntrack_lock);
/* Someone could be still looking at the helper in a bh. */
@@ -1511,37 +1500,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i,
}
/* Bring out ya dead! */
-static struct nf_conntrack_tuple_hash *
+static struct nf_conn *
get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
void *data, unsigned int *bucket)
{
- struct nf_conntrack_tuple_hash *h = NULL;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
write_lock_bh(&nf_conntrack_lock);
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
- h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
- struct nf_conntrack_tuple_hash *, iter, data);
- if (h)
- break;
+ list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
}
- if (!h)
- h = LIST_FIND_W(&unconfirmed, do_iter,
- struct nf_conntrack_tuple_hash *, iter, data);
- if (h)
- atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+ list_for_each_entry(h, &unconfirmed, list) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ return NULL;
+found:
+ atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
write_unlock_bh(&nf_conntrack_lock);
-
- return h;
+ return ct;
}
void
nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
{
- struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
unsigned int bucket = 0;
- while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
death_by_timeout((unsigned long)ct);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 11d3be243536..0c17a5bd112b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -15,13 +15,13 @@
* Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/ctype.h>
+#include <linux/inet.h>
#include <net/checksum.h>
#include <net/tcp.h>
@@ -112,101 +112,14 @@ static struct ftp_search {
},
};
-/* This code is based on inet_pton() in glibc-2.2.4 */
static int
get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
{
- static const char xdigits[] = "0123456789abcdef";
- u_int8_t tmp[16], *tp, *endp, *colonp;
- int ch, saw_xdigit;
- u_int32_t val;
- size_t clen = 0;
-
- tp = memset(tmp, '\0', sizeof(tmp));
- endp = tp + sizeof(tmp);
- colonp = NULL;
-
- /* Leading :: requires some special handling. */
- if (*src == ':'){
- if (*++src != ':') {
- DEBUGP("invalid \":\" at the head of addr\n");
- return 0;
- }
- clen++;
- }
-
- saw_xdigit = 0;
- val = 0;
- while ((clen < dlen) && (*src != term)) {
- const char *pch;
-
- ch = tolower(*src++);
- clen++;
-
- pch = strchr(xdigits, ch);
- if (pch != NULL) {
- val <<= 4;
- val |= (pch - xdigits);
- if (val > 0xffff)
- return 0;
-
- saw_xdigit = 1;
- continue;
- }
- if (ch != ':') {
- DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
- return 0;
- }
-
- if (!saw_xdigit) {
- if (colonp) {
- DEBUGP("invalid location of \"::\".\n");
- return 0;
- }
- colonp = tp;
- continue;
- } else if (*src == term) {
- DEBUGP("trancated IPv6 addr\n");
- return 0;
- }
-
- if (tp + 2 > endp)
- return 0;
- *tp++ = (u_int8_t) (val >> 8) & 0xff;
- *tp++ = (u_int8_t) val & 0xff;
-
- saw_xdigit = 0;
- val = 0;
- continue;
- }
- if (saw_xdigit) {
- if (tp + 2 > endp)
- return 0;
- *tp++ = (u_int8_t) (val >> 8) & 0xff;
- *tp++ = (u_int8_t) val & 0xff;
- }
- if (colonp != NULL) {
- /*
- * Since some memmove()'s erroneously fail to handle
- * overlapping regions, we'll do the shift by hand.
- */
- const int n = tp - colonp;
- int i;
-
- if (tp == endp)
- return 0;
-
- for (i = 1; i <= n; i++) {
- endp[- i] = colonp[n - i];
- colonp[n - i] = 0;
- }
- tp = endp;
- }
- if (tp != endp || (*src != term))
- return 0;
-
- memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
- return clen;
+ const char *end;
+ int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
+ if (ret > 0)
+ return (int)(end - src);
+ return 0;
}
static int try_number(const char *data, size_t dlen, u_int32_t array[],
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 3fc58e454d4e..21e0bc91cf23 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -15,7 +15,6 @@
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b8c7c567c9df..1721f7c78c77 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
+#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/netfilter.h>
@@ -338,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
/* dump everything */
events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS |
- IPCT_PROTOINFO |
- IPCT_HELPER |
- IPCT_HELPINFO |
- IPCT_NATINFO)) {
+ } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
@@ -394,6 +391,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
goto nfattr_failure;
+ if (events & IPCT_MARK
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nfattr_failure;
+
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
return NOTIFY_DONE;
@@ -428,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0], *id);
read_lock_bh(&nf_conntrack_lock);
+ last = (struct nf_conn *)cb->args[1];
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
- last = (struct nf_conn *)cb->args[1];
list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
h = (struct nf_conntrack_tuple_hash *) i;
if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -441,13 +442,10 @@ restart:
* then dump everything. */
if (l3proto && L3PROTO(ct) != l3proto)
continue;
- if (last != NULL) {
- if (ct == last) {
- nf_ct_put(last);
- cb->args[1] = 0;
- last = NULL;
- } else
+ if (cb->args[1]) {
+ if (ct != last)
continue;
+ cb->args[1] = 0;
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -457,65 +455,26 @@ restart:
cb->args[1] = (unsigned long)ct;
goto out;
}
+#ifdef CONFIG_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+ IPCTNL_MSG_CT_GET_CTRZERO)
+ memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
}
- if (last != NULL) {
- nf_ct_put(last);
+ if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
read_unlock_bh(&nf_conntrack_lock);
+ if (last)
+ nf_ct_put(last);
DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
return skb->len;
}
-#ifdef CONFIG_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct nf_conn *ct = NULL;
- struct nf_conntrack_tuple_hash *h;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[1];
- struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
-
- DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
- cb->args[0], *id);
-
- write_lock_bh(&nf_conntrack_lock);
- for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
- list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
- h = (struct nf_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && L3PROTO(ct) != l3proto)
- continue;
- if (ct->id <= *id)
- continue;
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0)
- goto out;
- *id = ct->id;
-
- memset(&ct->counters, 0, sizeof(ct->counters));
- }
- }
-out:
- write_unlock_bh(&nf_conntrack_lock);
-
- DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
- return skb->len;
-}
-#endif
-
static inline int
ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
{
@@ -790,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
u32 rlen;
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_NF_CT_ACCT
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table_w,
- ctnetlink_done)) != 0)
- return -EINVAL;
-#else
+#ifndef CONFIG_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
return -ENOTSUPP;
#endif
- } else {
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
+ if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_dump_table,
+ ctnetlink_done)) != 0)
return -EINVAL;
- }
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
@@ -1276,6 +1227,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
} else
return NOTIFY_DONE;
+ if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+ return NOTIFY_DONE;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 46bc27e2756d..26408bb0955b 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_protocol.h>
-unsigned int nf_ct_generic_timeout = 600*HZ;
+unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 0c6da496cfa9..af568777372b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -28,6 +28,8 @@
#include <linux/sctp.h>
#include <linux/string.h>
#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_protocol.h>
@@ -62,13 +64,13 @@ static const char *sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int nf_ct_sctp_timeout_closed = 10 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS;
-static unsigned int nf_ct_sctp_timeout_established = 5 DAYS;
-static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
+static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS;
+static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
static unsigned int * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
@@ -259,7 +261,7 @@ static int do_basic_checks(struct nf_conn *conntrack,
}
DEBUGP("Basic checks passed\n");
- return 0;
+ return count == 0;
}
static int new_state(enum ip_conntrack_dir dir,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 12fb7c0a1509..238bbb5b72ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -24,7 +24,6 @@
* version 2.2
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
@@ -58,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
-int nf_ct_tcp_be_liberal = 0;
+int nf_ct_tcp_be_liberal __read_mostly = 0;
/* When connection is picked up from the middle, how many packets are required
to pass in each direction when we assume we are in sync - if any side uses
window scaling, we lost the game.
If it is set to zero, we disable picking up already established
connections. */
-int nf_ct_tcp_loose = 3;
+int nf_ct_tcp_loose __read_mostly = 3;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
-int nf_ct_tcp_max_retrans = 3;
+int nf_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
@@ -93,19 +92,19 @@ static const char *tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS;
-unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS;
-unsigned int nf_ct_tcp_timeout_established = 5 DAYS;
-unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS;
-unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS;
-unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS;
-unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS;
-unsigned int nf_ct_tcp_timeout_close = 10 SECS;
+unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
+unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
+unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
+unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
+unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
+unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
+unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
+unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
-unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS;
+unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
static unsigned int * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
@@ -689,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
- && state->last_end == end)
+ && state->last_end == end
+ && state->last_win == win)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
+ state->last_win = win;
state->retrans = 0;
}
}
@@ -824,8 +825,7 @@ static int tcp_error(struct sk_buff *skb,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because the checksum is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
if (nf_conntrack_checksum &&
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index ae07ebe3ab37..d28981cf9af5 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_protocol.h>
-unsigned int nf_ct_udp_timeout = 30*HZ;
-unsigned int nf_ct_udp_timeout_stream = 180*HZ;
+unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
@@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
if (nf_conntrack_checksum &&
((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e34c574f0351..5954f6773810 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -17,7 +17,6 @@
* Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
@@ -38,7 +37,6 @@
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -429,6 +427,8 @@ static struct file_operations ct_cpu_seq_fops = {
/* Sysctl support */
+int nf_conntrack_checksum __read_mostly = 1;
+
#ifdef CONFIG_SYSCTL
/* From nf_conntrack_core.c */
@@ -460,8 +460,6 @@ extern unsigned int nf_ct_generic_timeout;
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-int nf_conntrack_checksum = 1;
-
static struct ctl_table_header *nf_ct_sysctl_header;
static ctl_table nf_ct_sysctl_table[] = {
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 6bdee2910617..a981971ce1d5 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -1,7 +1,6 @@
#ifndef _NF_INTERNALS_H
#define _NF_INTERNALS_H
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -24,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head,
int hook_thresh);
/* nf_queue.c */
-extern int nf_queue(struct sk_buff **skb,
+extern int nf_queue(struct sk_buff *skb,
struct list_head *elem,
int pf, unsigned int hook,
struct net_device *indev,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3e76bd0824a2..8901b3a07f7e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -1,4 +1,3 @@
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ee8f70889f47..4d8936ed581d 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,4 +1,3 @@
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -75,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
* Any packet that leaves via this function must come back
* through nf_reinject().
*/
-int nf_queue(struct sk_buff **skb,
- struct list_head *elem,
- int pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ int pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ unsigned int queuenum)
{
int status;
struct nf_info *info;
@@ -95,14 +94,14 @@ int nf_queue(struct sk_buff **skb,
read_lock(&queue_handler_lock);
if (!queue_handler[pf]) {
read_unlock(&queue_handler_lock);
- kfree_skb(*skb);
+ kfree_skb(skb);
return 1;
}
afinfo = nf_get_afinfo(pf);
if (!afinfo) {
read_unlock(&queue_handler_lock);
- kfree_skb(*skb);
+ kfree_skb(skb);
return 1;
}
@@ -110,9 +109,9 @@ int nf_queue(struct sk_buff **skb,
if (!info) {
if (net_ratelimit())
printk(KERN_ERR "OOM queueing packet %p\n",
- *skb);
+ skb);
read_unlock(&queue_handler_lock);
- kfree_skb(*skb);
+ kfree_skb(skb);
return 1;
}
@@ -131,15 +130,15 @@ int nf_queue(struct sk_buff **skb,
if (outdev) dev_hold(outdev);
#ifdef CONFIG_BRIDGE_NETFILTER
- if ((*skb)->nf_bridge) {
- physindev = (*skb)->nf_bridge->physindev;
+ if (skb->nf_bridge) {
+ physindev = skb->nf_bridge->physindev;
if (physindev) dev_hold(physindev);
- physoutdev = (*skb)->nf_bridge->physoutdev;
+ physoutdev = skb->nf_bridge->physoutdev;
if (physoutdev) dev_hold(physoutdev);
}
#endif
- afinfo->saveroute(*skb, info);
- status = queue_handler[pf]->outfn(*skb, info, queuenum,
+ afinfo->saveroute(skb, info);
+ status = queue_handler[pf]->outfn(skb, info, queuenum,
queue_handler[pf]->data);
read_unlock(&queue_handler_lock);
@@ -154,7 +153,7 @@ int nf_queue(struct sk_buff **skb,
#endif
module_put(info->elem->owner);
kfree(info);
- kfree_skb(*skb);
+ kfree_skb(skb);
return 1;
}
@@ -162,6 +161,46 @@ int nf_queue(struct sk_buff **skb,
return 1;
}
+int nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ int pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ unsigned int queuenum)
+{
+ struct sk_buff *segs;
+
+ if (!skb_is_gso(skb))
+ return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+ queuenum);
+
+ switch (pf) {
+ case AF_INET:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ }
+
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (unlikely(IS_ERR(segs)))
+ return 1;
+
+ do {
+ struct sk_buff *nskb = segs->next;
+
+ segs->next = NULL;
+ if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
+ queuenum))
+ kfree_skb(segs);
+ segs = nskb;
+ } while (segs);
+ return 1;
+}
+
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
unsigned int verdict)
{
@@ -220,21 +259,20 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
+ case NF_STOP:
info->okfn(skb);
+ case NF_STOLEN:
break;
-
case NF_QUEUE:
- if (!nf_queue(&skb, elem, info->pf, info->hook,
- info->indev, info->outdev, info->okfn,
- verdict >> NF_VERDICT_BITS))
+ if (!__nf_queue(skb, elem, info->pf, info->hook,
+ info->indev, info->outdev, info->okfn,
+ verdict >> NF_VERDICT_BITS))
goto next_hook;
break;
+ default:
+ kfree_skb(skb);
}
rcu_read_unlock();
-
- if (verdict == NF_DROP)
- kfree_skb(skb);
-
kfree(info);
return;
}
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 0a63d7dac7be..c2e44e90e437 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -1,4 +1,3 @@
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b88e82a1a987..52fdfa2686c9 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -14,7 +14,6 @@
* of the GNU General Public License, incorporated herein by reference.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -229,7 +228,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
NFNL_SUBSYS_ID(nlh->nlmsg_type),
NFNL_MSG_TYPE(nlh->nlmsg_type));
- if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
+ if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
DEBUGP("missing CAP_NET_ADMIN\n");
*errp = -EPERM;
return -1;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 61cdda4e5d3b..b59d3b2bde21 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst)
if (timer_pending(&inst->timer))
del_timer(&inst->timer);
+ if (!inst->skb)
+ return 0;
+
if (inst->qlen > 1)
inst->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 86a4ac33de34..8eb2473d83e1 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
break;
case NFQNL_COPY_PACKET:
- if (entskb->ip_summed == CHECKSUM_HW &&
- (*errp = skb_checksum_help(entskb,
- outdev == NULL))) {
+ if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
+ entskb->ip_summed == CHECKSUM_COMPLETE) &&
+ (*errp = skb_checksum_help(entskb))) {
spin_unlock_bh(&queue->lock);
return NULL;
}
@@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
queue->queue_dropped++;
status = -ENOSPC;
if (net_ratelimit())
- printk(KERN_WARNING "ip_queue: full at %d entries, "
+ printk(KERN_WARNING "nf_queue: full at %d entries, "
"dropping packets(s). Dropped: %d\n",
queue->queue_total, queue->queue_dropped);
goto err_out_free_nskb;
@@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
diff,
GFP_ATOMIC);
if (newskb == NULL) {
- printk(KERN_WARNING "ip_queue: OOM "
+ printk(KERN_WARNING "nf_queue: OOM "
"in mangle, dropping packet\n");
return -ENOMEM;
}
@@ -680,11 +680,19 @@ dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
if (entinf->indev)
if (entinf->indev->ifindex == ifindex)
return 1;
-
if (entinf->outdev)
if (entinf->outdev->ifindex == ifindex)
return 1;
-
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ if (entry->skb->nf_bridge->physindev &&
+ entry->skb->nf_bridge->physindev->ifindex == ifindex)
+ return 1;
+ if (entry->skb->nf_bridge->physoutdev &&
+ entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ return 1;
+ }
+#endif
return 0;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 99293c63ff73..58522fc65d33 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -13,7 +13,6 @@
*
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/net.h>
@@ -82,12 +81,42 @@ xt_unregister_target(struct xt_target *target)
int af = target->family;
mutex_lock(&xt[af].mutex);
- LIST_DELETE(&xt[af].target, target);
+ list_del(&target->list);
mutex_unlock(&xt[af].mutex);
}
EXPORT_SYMBOL(xt_unregister_target);
int
+xt_register_targets(struct xt_target *target, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = xt_register_target(&target[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ xt_unregister_targets(target, i);
+ return err;
+}
+EXPORT_SYMBOL(xt_register_targets);
+
+void
+xt_unregister_targets(struct xt_target *target, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ xt_unregister_target(&target[i]);
+}
+EXPORT_SYMBOL(xt_unregister_targets);
+
+int
xt_register_match(struct xt_match *match)
{
int ret, af = match->family;
@@ -109,11 +138,41 @@ xt_unregister_match(struct xt_match *match)
int af = match->family;
mutex_lock(&xt[af].mutex);
- LIST_DELETE(&xt[af].match, match);
+ list_del(&match->list);
mutex_unlock(&xt[af].mutex);
}
EXPORT_SYMBOL(xt_unregister_match);
+int
+xt_register_matches(struct xt_match *match, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = xt_register_match(&match[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ xt_unregister_matches(match, i);
+ return err;
+}
+EXPORT_SYMBOL(xt_register_matches);
+
+void
+xt_unregister_matches(struct xt_match *match, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ xt_unregister_match(&match[i]);
+}
+EXPORT_SYMBOL(xt_unregister_matches);
+
/*
* These are weird, but module loading must not be done with mutex
@@ -274,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
EXPORT_SYMBOL_GPL(xt_check_match);
#ifdef CONFIG_COMPAT
-int xt_compat_match(void *match, void **dstptr, int *size, int convert)
+int xt_compat_match_offset(struct xt_match *match)
{
- struct xt_match *m;
- struct compat_xt_entry_match *pcompat_m;
- struct xt_entry_match *pm;
- u_int16_t msize;
- int off, ret;
+ u_int16_t csize = match->compatsize ? : match->matchsize;
+ return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_offset);
- ret = 0;
- m = ((struct xt_entry_match *)match)->u.kernel.match;
- off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize);
- switch (convert) {
- case COMPAT_TO_USER:
- pm = (struct xt_entry_match *)match;
- msize = pm->u.user.match_size;
- if (copy_to_user(*dstptr, pm, msize)) {
- ret = -EFAULT;
- break;
- }
- msize -= off;
- if (put_user(msize, (u_int16_t *)*dstptr))
- ret = -EFAULT;
- *size -= off;
- *dstptr += msize;
- break;
- case COMPAT_FROM_USER:
- pcompat_m = (struct compat_xt_entry_match *)match;
- pm = (struct xt_entry_match *)*dstptr;
- msize = pcompat_m->u.user.match_size;
- memcpy(pm, pcompat_m, msize);
- msize += off;
- pm->u.user.match_size = msize;
- *size += off;
- *dstptr += msize;
- break;
- case COMPAT_CALC_SIZE:
- *size += off;
- break;
- default:
- ret = -ENOPROTOOPT;
- break;
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ int *size)
+{
+ struct xt_match *match = m->u.kernel.match;
+ struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+ int pad, off = xt_compat_match_offset(match);
+ u_int16_t msize = cm->u.user.match_size;
+
+ m = *dstptr;
+ memcpy(m, cm, sizeof(*cm));
+ if (match->compat_from_user)
+ match->compat_from_user(m->data, cm->data);
+ else
+ memcpy(m->data, cm->data, msize - sizeof(*cm));
+ pad = XT_ALIGN(match->matchsize) - match->matchsize;
+ if (pad > 0)
+ memset(m->data + match->matchsize, 0, pad);
+
+ msize += off;
+ m->u.user.match_size = msize;
+
+ *size += off;
+ *dstptr += msize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+
+int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
+ int *size)
+{
+ struct xt_match *match = m->u.kernel.match;
+ struct compat_xt_entry_match __user *cm = *dstptr;
+ int off = xt_compat_match_offset(match);
+ u_int16_t msize = m->u.user.match_size - off;
+
+ if (copy_to_user(cm, m, sizeof(*cm)) ||
+ put_user(msize, &cm->u.user.match_size))
+ return -EFAULT;
+
+ if (match->compat_to_user) {
+ if (match->compat_to_user((void __user *)cm->data, m->data))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
+ return -EFAULT;
}
- return ret;
+
+ *size -= off;
+ *dstptr += msize;
+ return 0;
}
-EXPORT_SYMBOL_GPL(xt_compat_match);
-#endif
+EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+#endif /* CONFIG_COMPAT */
int xt_check_target(const struct xt_target *target, unsigned short family,
unsigned int size, const char *table, unsigned int hook_mask,
@@ -351,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
EXPORT_SYMBOL_GPL(xt_check_target);
#ifdef CONFIG_COMPAT
-int xt_compat_target(void *target, void **dstptr, int *size, int convert)
+int xt_compat_target_offset(struct xt_target *target)
{
- struct xt_target *t;
- struct compat_xt_entry_target *pcompat;
- struct xt_entry_target *pt;
- u_int16_t tsize;
- int off, ret;
+ u_int16_t csize = target->compatsize ? : target->targetsize;
+ return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_offset);
- ret = 0;
- t = ((struct xt_entry_target *)target)->u.kernel.target;
- off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize);
- switch (convert) {
- case COMPAT_TO_USER:
- pt = (struct xt_entry_target *)target;
- tsize = pt->u.user.target_size;
- if (copy_to_user(*dstptr, pt, tsize)) {
- ret = -EFAULT;
- break;
- }
- tsize -= off;
- if (put_user(tsize, (u_int16_t *)*dstptr))
- ret = -EFAULT;
- *size -= off;
- *dstptr += tsize;
- break;
- case COMPAT_FROM_USER:
- pcompat = (struct compat_xt_entry_target *)target;
- pt = (struct xt_entry_target *)*dstptr;
- tsize = pcompat->u.user.target_size;
- memcpy(pt, pcompat, tsize);
- tsize += off;
- pt->u.user.target_size = tsize;
- *size += off;
- *dstptr += tsize;
- break;
- case COMPAT_CALC_SIZE:
- *size += off;
- break;
- default:
- ret = -ENOPROTOOPT;
- break;
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+ int *size)
+{
+ struct xt_target *target = t->u.kernel.target;
+ struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+ int pad, off = xt_compat_target_offset(target);
+ u_int16_t tsize = ct->u.user.target_size;
+
+ t = *dstptr;
+ memcpy(t, ct, sizeof(*ct));
+ if (target->compat_from_user)
+ target->compat_from_user(t->data, ct->data);
+ else
+ memcpy(t->data, ct->data, tsize - sizeof(*ct));
+ pad = XT_ALIGN(target->targetsize) - target->targetsize;
+ if (pad > 0)
+ memset(t->data + target->targetsize, 0, pad);
+
+ tsize += off;
+ t->u.user.target_size = tsize;
+
+ *size += off;
+ *dstptr += tsize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
+
+int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
+ int *size)
+{
+ struct xt_target *target = t->u.kernel.target;
+ struct compat_xt_entry_target __user *ct = *dstptr;
+ int off = xt_compat_target_offset(target);
+ u_int16_t tsize = t->u.user.target_size - off;
+
+ if (copy_to_user(ct, t, sizeof(*ct)) ||
+ put_user(tsize, &ct->u.user.target_size))
+ return -EFAULT;
+
+ if (target->compat_to_user) {
+ if (target->compat_to_user((void __user *)ct->data, t->data))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
+ return -EFAULT;
}
- return ret;
+
+ *size -= off;
+ *dstptr += tsize;
+ return 0;
}
-EXPORT_SYMBOL_GPL(xt_compat_target);
+EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
#endif
struct xt_table_info *xt_alloc_table_info(unsigned int size)
@@ -516,15 +601,18 @@ int xt_register_table(struct xt_table *table,
{
int ret;
struct xt_table_info *private;
+ struct xt_table *t;
ret = mutex_lock_interruptible(&xt[table->af].mutex);
if (ret != 0)
return ret;
/* Don't autoload: we'd eat our tail... */
- if (list_named_find(&xt[table->af].tables, table->name)) {
- ret = -EEXIST;
- goto unlock;
+ list_for_each_entry(t, &xt[table->af].tables, list) {
+ if (strcmp(t->name, table->name) == 0) {
+ ret = -EEXIST;
+ goto unlock;
+ }
}
/* Simplifies replace_table code. */
@@ -539,7 +627,7 @@ int xt_register_table(struct xt_table *table,
/* save number of initial entries */
private->initial_entries = private->number;
- list_prepend(&xt[table->af].tables, table);
+ list_add(&table->list, &xt[table->af].tables);
ret = 0;
unlock:
@@ -554,7 +642,7 @@ void *xt_unregister_table(struct xt_table *table)
mutex_lock(&xt[table->af].mutex);
private = table->private;
- LIST_DELETE(&xt[table->af].tables, table);
+ list_del(&table->list);
mutex_unlock(&xt[table->af].mutex);
return private;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index e54e57730012..50de965bb104 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -29,8 +29,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct xt_classify_target_info *clinfo = targinfo;
@@ -40,47 +39,41 @@ target(struct sk_buff **pskb,
return XT_CONTINUE;
}
-static struct xt_target classify_reg = {
- .name = "CLASSIFY",
- .target = target,
- .targetsize = sizeof(struct xt_classify_target_info),
- .table = "mangle",
- .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
- (1 << NF_IP_POST_ROUTING),
- .family = AF_INET,
- .me = THIS_MODULE,
+static struct xt_target xt_classify_target[] = {
+ {
+ .family = AF_INET,
+ .name = "CLASSIFY",
+ .target = target,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .table = "mangle",
+ .hooks = (1 << NF_IP_LOCAL_OUT) |
+ (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CLASSIFY",
+ .family = AF_INET6,
+ .target = target,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .table = "mangle",
+ .hooks = (1 << NF_IP_LOCAL_OUT) |
+ (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
};
-static struct xt_target classify6_reg = {
- .name = "CLASSIFY",
- .target = target,
- .targetsize = sizeof(struct xt_classify_target_info),
- .table = "mangle",
- .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
- (1 << NF_IP_POST_ROUTING),
- .family = AF_INET6,
- .me = THIS_MODULE,
-};
-
static int __init xt_classify_init(void)
{
- int ret;
-
- ret = xt_register_target(&classify_reg);
- if (ret)
- return ret;
-
- ret = xt_register_target(&classify6_reg);
- if (ret)
- xt_unregister_target(&classify_reg);
-
- return ret;
+ return xt_register_targets(xt_classify_target,
+ ARRAY_SIZE(xt_classify_target));
}
static void __exit xt_classify_fini(void)
{
- xt_unregister_target(&classify_reg);
- xt_unregister_target(&classify6_reg);
+ xt_unregister_targets(xt_classify_target,
+ ARRAY_SIZE(xt_classify_target));
}
module_init(xt_classify_init);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 60c375d36f01..c01524f817f0 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -38,8 +38,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct xt_connmark_target_info *markinfo = targinfo;
u_int32_t diff;
@@ -49,24 +48,37 @@ target(struct sk_buff **pskb,
u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
if (ctmark) {
- switch(markinfo->mode) {
- case XT_CONNMARK_SET:
- newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
- if (newmark != *ctmark)
- *ctmark = newmark;
- break;
- case XT_CONNMARK_SAVE:
- newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
- if (*ctmark != newmark)
- *ctmark = newmark;
- break;
- case XT_CONNMARK_RESTORE:
- nfmark = (*pskb)->nfmark;
- diff = (*ctmark ^ nfmark) & markinfo->mask;
- if (diff != 0)
- (*pskb)->nfmark = nfmark ^ diff;
- break;
- }
+ switch(markinfo->mode) {
+ case XT_CONNMARK_SET:
+ newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != *ctmark) {
+ *ctmark = newmark;
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+ ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+ nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+ }
+ break;
+ case XT_CONNMARK_SAVE:
+ newmark = (*ctmark & ~markinfo->mask) |
+ ((*pskb)->nfmark & markinfo->mask);
+ if (*ctmark != newmark) {
+ *ctmark = newmark;
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+ ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+ nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+ }
+ break;
+ case XT_CONNMARK_RESTORE:
+ nfmark = (*pskb)->nfmark;
+ diff = (*ctmark ^ nfmark) & markinfo->mask;
+ if (diff != 0)
+ (*pskb)->nfmark = nfmark ^ diff;
+ break;
+ }
}
return XT_CONTINUE;
@@ -77,65 +89,91 @@ checkentry(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct xt_connmark_target_info *matchinfo = targinfo;
if (matchinfo->mode == XT_CONNMARK_RESTORE) {
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "CONNMARK: restore can only be "
+ "called from \"mangle\" table, not \"%s\"\n",
+ tablename);
+ return 0;
+ }
}
-
if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
return 0;
}
-
return 1;
}
-static struct xt_target connmark_reg = {
- .name = "CONNMARK",
- .target = target,
- .targetsize = sizeof(struct xt_connmark_target_info),
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_target_info {
+ compat_ulong_t mark, mask;
+ u_int8_t mode;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
};
-static struct xt_target connmark6_reg = {
- .name = "CONNMARK",
- .target = target,
- .targetsize = sizeof(struct xt_connmark_target_info),
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_xt_connmark_target_info *cm = src;
+ struct xt_connmark_target_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .mode = cm->mode,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ struct xt_connmark_target_info *m = src;
+ struct compat_xt_connmark_target_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .mode = m->mode,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target xt_connmark_target[] = {
+ {
+ .name = "CONNMARK",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_connmark_target_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_connmark_target_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
+ .me = THIS_MODULE
+ },
+ {
+ .name = "CONNMARK",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_connmark_target_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_connmark_init(void)
{
- int ret;
-
need_conntrack();
-
- ret = xt_register_target(&connmark_reg);
- if (ret)
- return ret;
-
- ret = xt_register_target(&connmark6_reg);
- if (ret)
- xt_unregister_target(&connmark_reg);
-
- return ret;
+ return xt_register_targets(xt_connmark_target,
+ ARRAY_SIZE(xt_connmark_target));
}
static void __exit xt_connmark_fini(void)
{
- xt_unregister_target(&connmark_reg);
- xt_unregister_target(&connmark6_reg);
+ xt_unregister_targets(xt_connmark_target,
+ ARRAY_SIZE(xt_connmark_target));
}
module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 8c011e020769..467386266674 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb)
static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct sk_buff *skb = *pskb;
const struct xt_connsecmark_target_info *info = targinfo;
@@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
static int checkentry(const char *tablename, const void *entry,
const struct xt_target *target, void *targinfo,
- unsigned int targinfosize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
struct xt_connsecmark_target_info *info = targinfo;
@@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry,
return 1;
}
-static struct xt_target ipt_connsecmark_reg = {
- .name = "CONNSECMARK",
- .target = target,
- .targetsize = sizeof(struct xt_connsecmark_target_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
- .family = AF_INET,
- .revision = 0,
-};
-
-static struct xt_target ip6t_connsecmark_reg = {
- .name = "CONNSECMARK",
- .target = target,
- .targetsize = sizeof(struct xt_connsecmark_target_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
- .family = AF_INET6,
- .revision = 0,
+static struct xt_target xt_connsecmark_target[] = {
+ {
+ .name = "CONNSECMARK",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_connsecmark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CONNSECMARK",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_connsecmark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
};
static int __init xt_connsecmark_init(void)
{
- int err;
-
need_conntrack();
-
- err = xt_register_target(&ipt_connsecmark_reg);
- if (err)
- return err;
-
- err = xt_register_target(&ip6t_connsecmark_reg);
- if (err)
- xt_unregister_target(&ipt_connsecmark_reg);
-
- return err;
+ return xt_register_targets(xt_connsecmark_target,
+ ARRAY_SIZE(xt_connsecmark_target));
}
static void __exit xt_connsecmark_fini(void)
{
- xt_unregister_target(&ip6t_connsecmark_reg);
- xt_unregister_target(&ipt_connsecmark_reg);
+ xt_unregister_targets(xt_connsecmark_target,
+ ARRAY_SIZE(xt_connsecmark_target));
}
module_init(xt_connsecmark_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 000000000000..a7cc75aeb38d
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,118 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP modification module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_DSCP");
+MODULE_ALIAS("ip6t_DSCP");
+
+static unsigned int target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const struct xt_target *target,
+ const void *targinfo)
+{
+ const struct xt_DSCP_info *dinfo = targinfo;
+ u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+
+ if (dscp != dinfo->dscp) {
+ if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+ dinfo->dscp << XT_DSCP_SHIFT);
+
+ }
+ return XT_CONTINUE;
+}
+
+static unsigned int target6(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const struct xt_target *target,
+ const void *targinfo)
+{
+ const struct xt_DSCP_info *dinfo = targinfo;
+ u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+ if (dscp != dinfo->dscp) {
+ if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+ return NF_DROP;
+
+ ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+ dinfo->dscp << XT_DSCP_SHIFT);
+ }
+ return XT_CONTINUE;
+}
+
+static int checkentry(const char *tablename,
+ const void *e_void,
+ const struct xt_target *target,
+ void *targinfo,
+ unsigned int hook_mask)
+{
+ const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
+
+ if ((dscp > XT_DSCP_MAX)) {
+ printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+ return 0;
+ }
+ return 1;
+}
+
+static struct xt_target xt_dscp_target[] = {
+ {
+ .name = "DSCP",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_DSCP_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DSCP",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .target = target6,
+ .targetsize = sizeof(struct xt_DSCP_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init xt_dscp_target_init(void)
+{
+ return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+}
+
+static void __exit xt_dscp_target_fini(void)
+{
+ xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+}
+
+module_init(xt_dscp_target_init);
+module_exit(xt_dscp_target_fini);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index ee9c34edc76c..c6e860a7114f 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct xt_mark_target_info *markinfo = targinfo;
@@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct xt_mark_target_info_v1 *markinfo = targinfo;
int mark = 0;
@@ -76,7 +74,6 @@ checkentry_v0(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct xt_mark_target_info *markinfo = targinfo;
@@ -93,7 +90,6 @@ checkentry_v1(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct xt_mark_target_info_v1 *markinfo = targinfo;
@@ -112,65 +108,81 @@ checkentry_v1(const char *tablename,
return 1;
}
-static struct xt_target ipt_mark_reg_v0 = {
- .name = "MARK",
- .target = target_v0,
- .targetsize = sizeof(struct xt_mark_target_info),
- .table = "mangle",
- .checkentry = checkentry_v0,
- .me = THIS_MODULE,
- .family = AF_INET,
- .revision = 0,
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info_v1 {
+ compat_ulong_t mark;
+ u_int8_t mode;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
};
-static struct xt_target ipt_mark_reg_v1 = {
- .name = "MARK",
- .target = target_v1,
- .targetsize = sizeof(struct xt_mark_target_info_v1),
- .table = "mangle",
- .checkentry = checkentry_v1,
- .me = THIS_MODULE,
- .family = AF_INET,
- .revision = 1,
-};
+static void compat_from_user_v1(void *dst, void *src)
+{
+ struct compat_xt_mark_target_info_v1 *cm = src;
+ struct xt_mark_target_info_v1 m = {
+ .mark = cm->mark,
+ .mode = cm->mode,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
-static struct xt_target ip6t_mark_reg_v0 = {
- .name = "MARK",
- .target = target_v0,
- .targetsize = sizeof(struct xt_mark_target_info),
- .table = "mangle",
- .checkentry = checkentry_v0,
- .me = THIS_MODULE,
- .family = AF_INET6,
- .revision = 0,
+static int compat_to_user_v1(void __user *dst, void *src)
+{
+ struct xt_mark_target_info_v1 *m = src;
+ struct compat_xt_mark_target_info_v1 cm = {
+ .mark = m->mark,
+ .mode = m->mode,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target xt_mark_target[] = {
+ {
+ .name = "MARK",
+ .family = AF_INET,
+ .revision = 0,
+ .checkentry = checkentry_v0,
+ .target = target_v0,
+ .targetsize = sizeof(struct xt_mark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "MARK",
+ .family = AF_INET,
+ .revision = 1,
+ .checkentry = checkentry_v1,
+ .target = target_v1,
+ .targetsize = sizeof(struct xt_mark_target_info_v1),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
+ .compat_from_user = compat_from_user_v1,
+ .compat_to_user = compat_to_user_v1,
+#endif
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "MARK",
+ .family = AF_INET6,
+ .revision = 0,
+ .checkentry = checkentry_v0,
+ .target = target_v0,
+ .targetsize = sizeof(struct xt_mark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
};
static int __init xt_mark_init(void)
{
- int err;
-
- err = xt_register_target(&ipt_mark_reg_v0);
- if (err)
- return err;
-
- err = xt_register_target(&ipt_mark_reg_v1);
- if (err)
- xt_unregister_target(&ipt_mark_reg_v0);
-
- err = xt_register_target(&ip6t_mark_reg_v0);
- if (err) {
- xt_unregister_target(&ipt_mark_reg_v0);
- xt_unregister_target(&ipt_mark_reg_v1);
- }
-
- return err;
+ return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
}
static void __exit xt_mark_fini(void)
{
- xt_unregister_target(&ipt_mark_reg_v0);
- xt_unregister_target(&ipt_mark_reg_v1);
- xt_unregister_target(&ip6t_mark_reg_v0);
+ xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
}
module_init(xt_mark_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 86ccceb61fdd..db9b896e57c8 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -29,65 +29,46 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct xt_NFQ_info *tinfo = targinfo;
return NF_QUEUE_NR(tinfo->queuenum);
}
-static struct xt_target ipt_NFQ_reg = {
- .name = "NFQUEUE",
- .target = target,
- .targetsize = sizeof(struct xt_NFQ_info),
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_target ip6t_NFQ_reg = {
- .name = "NFQUEUE",
- .target = target,
- .targetsize = sizeof(struct xt_NFQ_info),
- .family = AF_INET6,
- .me = THIS_MODULE,
-};
-
-static struct xt_target arpt_NFQ_reg = {
- .name = "NFQUEUE",
- .target = target,
- .targetsize = sizeof(struct xt_NFQ_info),
- .family = NF_ARP,
- .me = THIS_MODULE,
+static struct xt_target xt_nfqueue_target[] = {
+ {
+ .name = "NFQUEUE",
+ .family = AF_INET,
+ .target = target,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NFQUEUE",
+ .family = AF_INET6,
+ .target = target,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NFQUEUE",
+ .family = NF_ARP,
+ .target = target,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_nfqueue_init(void)
{
- int ret;
- ret = xt_register_target(&ipt_NFQ_reg);
- if (ret)
- return ret;
- ret = xt_register_target(&ip6t_NFQ_reg);
- if (ret)
- goto out_ip;
- ret = xt_register_target(&arpt_NFQ_reg);
- if (ret)
- goto out_ip6;
-
- return ret;
-out_ip6:
- xt_unregister_target(&ip6t_NFQ_reg);
-out_ip:
- xt_unregister_target(&ipt_NFQ_reg);
-
- return ret;
+ return xt_register_targets(xt_nfqueue_target,
+ ARRAY_SIZE(xt_nfqueue_target));
}
static void __exit xt_nfqueue_fini(void)
{
- xt_unregister_target(&arpt_NFQ_reg);
- xt_unregister_target(&ip6t_NFQ_reg);
- xt_unregister_target(&ipt_NFQ_reg);
+ xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target));
}
module_init(xt_nfqueue_init);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 98f4b5363ce8..6d00dcaed238 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -16,8 +16,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
/* Previously seen (loopback)? Ignore. */
if ((*pskb)->nfct != NULL)
@@ -34,43 +33,32 @@ target(struct sk_buff **pskb,
return XT_CONTINUE;
}
-static struct xt_target notrack_reg = {
- .name = "NOTRACK",
- .target = target,
- .targetsize = 0,
- .table = "raw",
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_target notrack6_reg = {
- .name = "NOTRACK",
- .target = target,
- .targetsize = 0,
- .table = "raw",
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_target xt_notrack_target[] = {
+ {
+ .name = "NOTRACK",
+ .family = AF_INET,
+ .target = target,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NOTRACK",
+ .family = AF_INET6,
+ .target = target,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
};
static int __init xt_notrack_init(void)
{
- int ret;
-
- ret = xt_register_target(&notrack_reg);
- if (ret)
- return ret;
-
- ret = xt_register_target(&notrack6_reg);
- if (ret)
- xt_unregister_target(&notrack_reg);
-
- return ret;
+ return xt_register_targets(xt_notrack_target,
+ ARRAY_SIZE(xt_notrack_target));
}
static void __exit xt_notrack_fini(void)
{
- xt_unregister_target(&notrack6_reg);
- xt_unregister_target(&notrack_reg);
+ xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target));
}
module_init(xt_notrack_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c2ce9c4011cc..add752196290 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -31,7 +31,7 @@ static u8 mode;
static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
u32 secmark = 0;
const struct xt_secmark_target_info *info = targinfo;
@@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info)
{
int err;
struct xt_secmark_target_selinux_info *sel = &info->u.sel;
+
+ sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0';
err = selinux_string_to_sid(sel->selctx, &sel->selsid);
if (err) {
@@ -83,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info)
static int checkentry(const char *tablename, const void *entry,
const struct xt_target *target, void *targinfo,
- unsigned int targinfosize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
struct xt_secmark_target_info *info = targinfo;
@@ -109,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry,
return 1;
}
-static struct xt_target ipt_secmark_reg = {
- .name = "SECMARK",
- .target = target,
- .targetsize = sizeof(struct xt_secmark_target_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
- .family = AF_INET,
- .revision = 0,
-};
-
-static struct xt_target ip6t_secmark_reg = {
- .name = "SECMARK",
- .target = target,
- .targetsize = sizeof(struct xt_secmark_target_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
- .family = AF_INET6,
- .revision = 0,
+static struct xt_target xt_secmark_target[] = {
+ {
+ .name = "SECMARK",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SECMARK",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .target = target,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
};
static int __init xt_secmark_init(void)
{
- int err;
-
- err = xt_register_target(&ipt_secmark_reg);
- if (err)
- return err;
-
- err = xt_register_target(&ip6t_secmark_reg);
- if (err)
- xt_unregister_target(&ipt_secmark_reg);
-
- return err;
+ return xt_register_targets(xt_secmark_target,
+ ARRAY_SIZE(xt_secmark_target));
}
static void __exit xt_secmark_fini(void)
{
- xt_unregister_target(&ip6t_secmark_reg);
- xt_unregister_target(&ipt_secmark_reg);
+ xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target));
}
module_init(xt_secmark_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 197609cb06d7..7db492d65220 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -29,41 +29,32 @@ match(const struct sk_buff *skb,
return 1;
}
-static struct xt_match comment_match = {
- .name = "comment",
- .match = match,
- .matchsize = sizeof(struct xt_comment_info),
- .family = AF_INET,
- .me = THIS_MODULE
-};
-
-static struct xt_match comment6_match = {
- .name = "comment",
- .match = match,
- .matchsize = sizeof(struct xt_comment_info),
- .family = AF_INET6,
- .me = THIS_MODULE
+static struct xt_match xt_comment_match[] = {
+ {
+ .name = "comment",
+ .family = AF_INET,
+ .match = match,
+ .matchsize = sizeof(struct xt_comment_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "comment",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct xt_comment_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_comment_init(void)
{
- int ret;
-
- ret = xt_register_match(&comment_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&comment6_match);
- if (ret)
- xt_unregister_match(&comment_match);
-
- return ret;
+ return xt_register_matches(xt_comment_match,
+ ARRAY_SIZE(xt_comment_match));
}
static void __exit xt_comment_fini(void)
{
- xt_unregister_match(&comment_match);
- xt_unregister_match(&comment6_match);
+ xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match));
}
module_init(xt_comment_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1396fe2d07c1..dcc497ea8183 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -125,7 +125,6 @@ static int check(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_connbytes_info *sinfo = matchinfo;
@@ -143,40 +142,35 @@ static int check(const char *tablename,
return 1;
}
-static struct xt_match connbytes_match = {
- .name = "connbytes",
- .match = match,
- .checkentry = check,
- .matchsize = sizeof(struct xt_connbytes_info),
- .family = AF_INET,
- .me = THIS_MODULE
-};
-static struct xt_match connbytes6_match = {
- .name = "connbytes",
- .match = match,
- .checkentry = check,
- .matchsize = sizeof(struct xt_connbytes_info),
- .family = AF_INET6,
- .me = THIS_MODULE
+static struct xt_match xt_connbytes_match[] = {
+ {
+ .name = "connbytes",
+ .family = AF_INET,
+ .checkentry = check,
+ .match = match,
+ .matchsize = sizeof(struct xt_connbytes_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "connbytes",
+ .family = AF_INET6,
+ .checkentry = check,
+ .match = match,
+ .matchsize = sizeof(struct xt_connbytes_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_connbytes_init(void)
{
- int ret;
- ret = xt_register_match(&connbytes_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&connbytes6_match);
- if (ret)
- xt_unregister_match(&connbytes_match);
- return ret;
+ return xt_register_matches(xt_connbytes_match,
+ ARRAY_SIZE(xt_connbytes_match));
}
static void __exit xt_connbytes_fini(void)
{
- xt_unregister_match(&connbytes_match);
- xt_unregister_match(&connbytes6_match);
+ xt_unregister_matches(xt_connbytes_match,
+ ARRAY_SIZE(xt_connbytes_match));
}
module_init(xt_connbytes_init);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 56324c8aff0a..92a5726ef237 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -55,7 +55,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct xt_connmark_info *cm = matchinfo;
@@ -75,53 +74,80 @@ checkentry(const char *tablename,
}
static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
#endif
}
-static struct xt_match connmark_match = {
- .name = "connmark",
- .match = match,
- .matchsize = sizeof(struct xt_connmark_info),
- .checkentry = checkentry,
- .destroy = destroy,
- .family = AF_INET,
- .me = THIS_MODULE
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_info {
+ compat_ulong_t mark, mask;
+ u_int8_t invert;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
};
-static struct xt_match connmark6_match = {
- .name = "connmark",
- .match = match,
- .matchsize = sizeof(struct xt_connmark_info),
- .checkentry = checkentry,
- .destroy = destroy,
- .family = AF_INET6,
- .me = THIS_MODULE
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_xt_connmark_info *cm = src;
+ struct xt_connmark_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .invert = cm->invert,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ struct xt_connmark_info *m = src;
+ struct compat_xt_connmark_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .invert = m->invert,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match xt_connmark_match[] = {
+ {
+ .name = "connmark",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_connmark_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_connmark_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
+ .me = THIS_MODULE
+ },
+ {
+ .name = "connmark",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_connmark_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_connmark_init(void)
{
- int ret;
-
need_conntrack();
-
- ret = xt_register_match(&connmark_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&connmark6_match);
- if (ret)
- xt_unregister_match(&connmark_match);
- return ret;
+ return xt_register_matches(xt_connmark_match,
+ ARRAY_SIZE(xt_connmark_match));
}
static void __exit xt_connmark_fini(void)
{
- xt_unregister_match(&connmark6_match);
- xt_unregister_match(&connmark_match);
+ xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match));
}
module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 145489a4c3f2..0ea501a2fda5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -45,7 +45,7 @@ match(const struct sk_buff *skb,
ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
if (ct == &ip_conntrack_untracked)
statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -54,63 +54,72 @@ match(const struct sk_buff *skb,
else
statebit = XT_CONNTRACK_STATE_INVALID;
- if(sinfo->flags & XT_CONNTRACK_STATE) {
+ if (sinfo->flags & XT_CONNTRACK_STATE) {
if (ct) {
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
+ if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
statebit |= XT_CONNTRACK_STATE_SNAT;
-
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
+ if (test_bit(IPS_DST_NAT_BIT, &ct->status))
statebit |= XT_CONNTRACK_STATE_DNAT;
}
-
- if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
- return 0;
- }
-
- if(sinfo->flags & XT_CONNTRACK_PROTO) {
- if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
- return 0;
- }
-
- if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
+ if (FWINV((statebit & sinfo->statemask) == 0,
+ XT_CONNTRACK_STATE))
return 0;
}
- if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
+ if (ct == NULL) {
+ if (sinfo->flags & ~XT_CONNTRACK_STATE)
return 0;
+ return 1;
}
- if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_PROTO &&
+ FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+ XT_CONNTRACK_PROTO))
+ return 0;
+
+ if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
+ sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+ XT_CONNTRACK_ORIGSRC))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_REPLDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
+ sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+ XT_CONNTRACK_ORIGDST))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_STATUS) {
- if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
+ sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+ XT_CONNTRACK_REPLSRC))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires;
+ if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
+ sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+ XT_CONNTRACK_REPLDST))
+ return 0;
- if(!ct)
- return 0;
+ if (sinfo->flags & XT_CONNTRACK_STATUS &&
+ FWINV((ct->status & sinfo->statusmask) == 0,
+ XT_CONNTRACK_STATUS))
+ return 0;
- expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+ if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
+ unsigned long expires = timer_pending(&ct->timeout) ?
+ (ct->timeout.expires - jiffies)/HZ : 0;
- if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
+ if (FWINV(!(expires >= sinfo->expires_min &&
+ expires <= sinfo->expires_max),
+ XT_CONNTRACK_EXPIRES))
return 0;
}
-
return 1;
}
@@ -141,63 +150,72 @@ match(const struct sk_buff *skb,
else
statebit = XT_CONNTRACK_STATE_INVALID;
- if(sinfo->flags & XT_CONNTRACK_STATE) {
+ if (sinfo->flags & XT_CONNTRACK_STATE) {
if (ct) {
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+ if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
statebit |= XT_CONNTRACK_STATE_SNAT;
-
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+ if (test_bit(IPS_DST_NAT_BIT, &ct->status))
statebit |= XT_CONNTRACK_STATE_DNAT;
}
-
- if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
- return 0;
- }
-
- if(sinfo->flags & XT_CONNTRACK_PROTO) {
- if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
- return 0;
- }
-
- if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
+ if (FWINV((statebit & sinfo->statemask) == 0,
+ XT_CONNTRACK_STATE))
return 0;
}
- if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
+ if (ct == NULL) {
+ if (sinfo->flags & ~XT_CONNTRACK_STATE)
return 0;
+ return 1;
}
- if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_PROTO &&
+ FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+ XT_CONNTRACK_PROTO))
+ return 0;
+
+ if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
+ sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+ XT_CONNTRACK_ORIGSRC))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_REPLDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
+ sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+ XT_CONNTRACK_ORIGDST))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_STATUS) {
- if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
- return 0;
- }
+ if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
+ sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+ XT_CONNTRACK_REPLSRC))
+ return 0;
- if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires;
+ if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
+ sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+ XT_CONNTRACK_REPLDST))
+ return 0;
- if(!ct)
- return 0;
+ if (sinfo->flags & XT_CONNTRACK_STATUS &&
+ FWINV((ct->status & sinfo->statusmask) == 0,
+ XT_CONNTRACK_STATUS))
+ return 0;
- expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+ if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
+ unsigned long expires = timer_pending(&ct->timeout) ?
+ (ct->timeout.expires - jiffies)/HZ : 0;
- if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
+ if (FWINV(!(expires >= sinfo->expires_min &&
+ expires <= sinfo->expires_max),
+ XT_CONNTRACK_EXPIRES))
return 0;
}
-
return 1;
}
@@ -208,7 +226,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -221,8 +238,7 @@ checkentry(const char *tablename,
return 1;
}
-static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+static void destroy(const struct xt_match *match, void *matchinfo)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
@@ -241,11 +257,8 @@ static struct xt_match conntrack_match = {
static int __init xt_conntrack_init(void)
{
- int ret;
need_conntrack();
- ret = xt_register_match(&conntrack_match);
-
- return ret;
+ return xt_register_match(&conntrack_match);
}
static void __exit xt_conntrack_fini(void)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2e2f825dad4c..3e6cf430e518 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -131,7 +131,6 @@ checkentry(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_dccp_info *info = matchinfo;
@@ -141,27 +140,26 @@ checkentry(const char *tablename,
&& !(info->invflags & ~info->flags);
}
-static struct xt_match dccp_match =
-{
- .name = "dccp",
- .match = match,
- .matchsize = sizeof(struct xt_dccp_info),
- .proto = IPPROTO_DCCP,
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
+static struct xt_match xt_dccp_match[] = {
+ {
+ .name = "dccp",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_dccp_info),
+ .proto = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "dccp",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_dccp_info),
+ .proto = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ },
};
-static struct xt_match dccp6_match =
-{
- .name = "dccp",
- .match = match,
- .matchsize = sizeof(struct xt_dccp_info),
- .proto = IPPROTO_DCCP,
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
-};
-
static int __init xt_dccp_init(void)
{
@@ -173,27 +171,19 @@ static int __init xt_dccp_init(void)
dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
if (!dccp_optbuf)
return -ENOMEM;
- ret = xt_register_match(&dccp_match);
+ ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
if (ret)
goto out_kfree;
- ret = xt_register_match(&dccp6_match);
- if (ret)
- goto out_unreg;
-
return ret;
-out_unreg:
- xt_unregister_match(&dccp_match);
out_kfree:
kfree(dccp_optbuf);
-
return ret;
}
static void __exit xt_dccp_fini(void)
{
- xt_unregister_match(&dccp6_match);
- xt_unregister_match(&dccp_match);
+ xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
kfree(dccp_optbuf);
}
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 000000000000..26c7f4ad102a
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,103 @@
+/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
+ *
+ * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/xt_dscp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP matching module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_dscp");
+MODULE_ALIAS("ip6t_dscp");
+
+static int match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct xt_match *match,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ const struct xt_dscp_info *info = matchinfo;
+ u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+
+ return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int match6(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct xt_match *match,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ const struct xt_dscp_info *info = matchinfo;
+ u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+ return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int checkentry(const char *tablename,
+ const void *info,
+ const struct xt_match *match,
+ void *matchinfo,
+ unsigned int hook_mask)
+{
+ const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
+
+ if (dscp > XT_DSCP_MAX) {
+ printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct xt_match xt_dscp_match[] = {
+ {
+ .name = "dscp",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_dscp_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "dscp",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match6,
+ .matchsize = sizeof(struct xt_dscp_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init xt_dscp_match_init(void)
+{
+ return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+}
+
+static void __exit xt_dscp_match_fini(void)
+{
+ xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+}
+
+module_init(xt_dscp_match_init);
+module_exit(xt_dscp_match_fini);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9dad6281e0c1..7c95f149d942 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -79,7 +79,6 @@ checkentry(const char *tablename,
const void *ip_void,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct xt_esp *espinfo = matchinfo;
@@ -92,44 +91,35 @@ checkentry(const char *tablename,
return 1;
}
-static struct xt_match esp_match = {
- .name = "esp",
- .family = AF_INET,
- .proto = IPPROTO_ESP,
- .match = &match,
- .matchsize = sizeof(struct xt_esp),
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static struct xt_match esp6_match = {
- .name = "esp",
- .family = AF_INET6,
- .proto = IPPROTO_ESP,
- .match = &match,
- .matchsize = sizeof(struct xt_esp),
- .checkentry = &checkentry,
- .me = THIS_MODULE,
+static struct xt_match xt_esp_match[] = {
+ {
+ .name = "esp",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_esp),
+ .proto = IPPROTO_ESP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "esp",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_esp),
+ .proto = IPPROTO_ESP,
+ .me = THIS_MODULE,
+ },
};
static int __init xt_esp_init(void)
{
- int ret;
- ret = xt_register_match(&esp_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&esp6_match);
- if (ret)
- xt_unregister_match(&esp_match);
-
- return ret;
+ return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
}
static void __exit xt_esp_cleanup(void)
{
- xt_unregister_match(&esp_match);
- xt_unregister_match(&esp6_match);
+ xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
}
module_init(xt_esp_init);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 799c2a43e3b9..5d7818b73e3a 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -139,7 +139,6 @@ static int check(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct xt_helper_info *info = matchinfo;
@@ -156,52 +155,44 @@ static int check(const char *tablename,
}
static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
#endif
}
-static struct xt_match helper_match = {
- .name = "helper",
- .match = match,
- .matchsize = sizeof(struct xt_helper_info),
- .checkentry = check,
- .destroy = destroy,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-static struct xt_match helper6_match = {
- .name = "helper",
- .match = match,
- .matchsize = sizeof(struct xt_helper_info),
- .checkentry = check,
- .destroy = destroy,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_helper_match[] = {
+ {
+ .name = "helper",
+ .family = AF_INET,
+ .checkentry = check,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_helper_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "helper",
+ .family = AF_INET6,
+ .checkentry = check,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_helper_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_helper_init(void)
{
- int ret;
need_conntrack();
-
- ret = xt_register_match(&helper_match);
- if (ret < 0)
- return ret;
-
- ret = xt_register_match(&helper6_match);
- if (ret < 0)
- xt_unregister_match(&helper_match);
-
- return ret;
+ return xt_register_matches(xt_helper_match,
+ ARRAY_SIZE(xt_helper_match));
}
static void __exit xt_helper_fini(void)
{
- xt_unregister_match(&helper_match);
- xt_unregister_match(&helper6_match);
+ xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match));
}
module_init(xt_helper_init);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 109132c9a146..67fd30d9f303 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -52,39 +52,32 @@ match6(const struct sk_buff *skb,
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
-static struct xt_match length_match = {
- .name = "length",
- .match = match,
- .matchsize = sizeof(struct xt_length_info),
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match length6_match = {
- .name = "length",
- .match = match6,
- .matchsize = sizeof(struct xt_length_info),
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_length_match[] = {
+ {
+ .name = "length",
+ .family = AF_INET,
+ .match = match,
+ .matchsize = sizeof(struct xt_length_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "length",
+ .family = AF_INET6,
+ .match = match6,
+ .matchsize = sizeof(struct xt_length_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_length_init(void)
{
- int ret;
- ret = xt_register_match(&length_match);
- if (ret)
- return ret;
- ret = xt_register_match(&length6_match);
- if (ret)
- xt_unregister_match(&length_match);
-
- return ret;
+ return xt_register_matches(xt_length_match,
+ ARRAY_SIZE(xt_length_match));
}
static void __exit xt_length_fini(void)
{
- xt_unregister_match(&length_match);
- xt_unregister_match(&length6_match);
+ xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match));
}
module_init(xt_length_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index ce7fdb7e4e07..fda7b7dec27d 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct xt_rateinfo *r = matchinfo;
@@ -123,55 +122,95 @@ ipt_limit_checkentry(const char *tablename,
return 0;
}
- /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
- 128. */
- r->prev = jiffies;
- r->credit = user2credits(r->avg * r->burst); /* Credits full. */
- r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
- r->cost = user2credits(r->avg);
-
/* For SMP, we only want to use one set of counters. */
r->master = r;
-
+ if (r->cost == 0) {
+ /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+ 128. */
+ r->prev = jiffies;
+ r->credit = user2credits(r->avg * r->burst); /* Credits full. */
+ r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+ r->cost = user2credits(r->avg);
+ }
return 1;
}
-static struct xt_match ipt_limit_reg = {
- .name = "limit",
- .match = ipt_limit_match,
- .matchsize = sizeof(struct xt_rateinfo),
- .checkentry = ipt_limit_checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+ u_int32_t avg;
+ u_int32_t burst;
+
+ compat_ulong_t prev;
+ u_int32_t credit;
+ u_int32_t credit_cap, cost;
+
+ u_int32_t master;
};
-static struct xt_match limit6_reg = {
- .name = "limit",
- .match = ipt_limit_match,
- .matchsize = sizeof(struct xt_rateinfo),
- .checkentry = ipt_limit_checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
+
+/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
+ * master pointer, which does not need to be preserved. */
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_xt_rateinfo *cm = src;
+ struct xt_rateinfo m = {
+ .avg = cm->avg,
+ .burst = cm->burst,
+ .prev = cm->prev | (unsigned long)cm->master << 32,
+ .credit = cm->credit,
+ .credit_cap = cm->credit_cap,
+ .cost = cm->cost,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ struct xt_rateinfo *m = src;
+ struct compat_xt_rateinfo cm = {
+ .avg = m->avg,
+ .burst = m->burst,
+ .prev = m->prev,
+ .credit = m->credit,
+ .credit_cap = m->credit_cap,
+ .cost = m->cost,
+ .master = m->prev >> 32,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match xt_limit_match[] = {
+ {
+ .name = "limit",
+ .family = AF_INET,
+ .checkentry = ipt_limit_checkentry,
+ .match = ipt_limit_match,
+ .matchsize = sizeof(struct xt_rateinfo),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_rateinfo),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "limit",
+ .family = AF_INET6,
+ .checkentry = ipt_limit_checkentry,
+ .match = ipt_limit_match,
+ .matchsize = sizeof(struct xt_rateinfo),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_limit_init(void)
{
- int ret;
-
- ret = xt_register_match(&ipt_limit_reg);
- if (ret)
- return ret;
-
- ret = xt_register_match(&limit6_reg);
- if (ret)
- xt_unregister_match(&ipt_limit_reg);
-
- return ret;
+ return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
}
static void __exit xt_limit_fini(void)
{
- xt_unregister_match(&ipt_limit_reg);
- xt_unregister_match(&limit6_reg);
+ xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
}
module_init(xt_limit_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 356290ffe386..425fc21e31f5 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -43,43 +43,37 @@ match(const struct sk_buff *skb,
^ info->invert));
}
-static struct xt_match mac_match = {
- .name = "mac",
- .match = match,
- .matchsize = sizeof(struct xt_mac_info),
- .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
- (1 << NF_IP_FORWARD),
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-static struct xt_match mac6_match = {
- .name = "mac",
- .match = match,
- .matchsize = sizeof(struct xt_mac_info),
- .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
- (1 << NF_IP_FORWARD),
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_mac_match[] = {
+ {
+ .name = "mac",
+ .family = AF_INET,
+ .match = match,
+ .matchsize = sizeof(struct xt_mac_info),
+ .hooks = (1 << NF_IP_PRE_ROUTING) |
+ (1 << NF_IP_LOCAL_IN) |
+ (1 << NF_IP_FORWARD),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "mac",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct xt_mac_info),
+ .hooks = (1 << NF_IP_PRE_ROUTING) |
+ (1 << NF_IP_LOCAL_IN) |
+ (1 << NF_IP_FORWARD),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_mac_init(void)
{
- int ret;
- ret = xt_register_match(&mac_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&mac6_match);
- if (ret)
- xt_unregister_match(&mac_match);
-
- return ret;
+ return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
}
static void __exit xt_mac_fini(void)
{
- xt_unregister_match(&mac_match);
- xt_unregister_match(&mac6_match);
+ xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
}
module_init(xt_mac_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 876bc5797738..934dddfbcd23 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -39,7 +39,6 @@ checkentry(const char *tablename,
const void *entry,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_mark_info *minfo = matchinfo;
@@ -51,42 +50,69 @@ checkentry(const char *tablename,
return 1;
}
-static struct xt_match mark_match = {
- .name = "mark",
- .match = match,
- .matchsize = sizeof(struct xt_mark_info),
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_info {
+ compat_ulong_t mark, mask;
+ u_int8_t invert;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
};
-static struct xt_match mark6_match = {
- .name = "mark",
- .match = match,
- .matchsize = sizeof(struct xt_mark_info),
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
-};
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_xt_mark_info *cm = src;
+ struct xt_mark_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .invert = cm->invert,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
-static int __init xt_mark_init(void)
+static int compat_to_user(void __user *dst, void *src)
{
- int ret;
- ret = xt_register_match(&mark_match);
- if (ret)
- return ret;
+ struct xt_mark_info *m = src;
+ struct compat_xt_mark_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .invert = m->invert,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
- ret = xt_register_match(&mark6_match);
- if (ret)
- xt_unregister_match(&mark_match);
+static struct xt_match xt_mark_match[] = {
+ {
+ .name = "mark",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_mark_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_mark_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "mark",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_mark_info),
+ .me = THIS_MODULE,
+ },
+};
- return ret;
+static int __init xt_mark_init(void)
+{
+ return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
}
static void __exit xt_mark_fini(void)
{
- xt_unregister_match(&mark_match);
- xt_unregister_match(&mark6_match);
+ xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
}
module_init(xt_mark_init);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1ff0a25396e7..d3aefd380930 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -176,7 +176,6 @@ checkentry(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_ip *ip = info;
@@ -191,7 +190,6 @@ checkentry_v1(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_ip *ip = info;
@@ -206,7 +204,6 @@ checkentry6(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ip6t_ip6 *ip = info;
@@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ip6t_ip6 *ip = info;
@@ -231,84 +227,55 @@ checkentry6_v1(const char *tablename,
multiinfo->count);
}
-static struct xt_match multiport_match = {
- .name = "multiport",
- .revision = 0,
- .matchsize = sizeof(struct xt_multiport),
- .match = &match,
- .checkentry = &checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match multiport_match_v1 = {
- .name = "multiport",
- .revision = 1,
- .matchsize = sizeof(struct xt_multiport_v1),
- .match = &match_v1,
- .checkentry = &checkentry_v1,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match multiport6_match = {
- .name = "multiport",
- .revision = 0,
- .matchsize = sizeof(struct xt_multiport),
- .match = &match,
- .checkentry = &checkentry6,
- .family = AF_INET6,
- .me = THIS_MODULE,
-};
-
-static struct xt_match multiport6_match_v1 = {
- .name = "multiport",
- .revision = 1,
- .matchsize = sizeof(struct xt_multiport_v1),
- .match = &match_v1,
- .checkentry = &checkentry6_v1,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_multiport_match[] = {
+ {
+ .name = "multiport",
+ .family = AF_INET,
+ .revision = 0,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_multiport),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = AF_INET,
+ .revision = 1,
+ .checkentry = checkentry_v1,
+ .match = match_v1,
+ .matchsize = sizeof(struct xt_multiport_v1),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = AF_INET6,
+ .revision = 0,
+ .checkentry = checkentry6,
+ .match = match,
+ .matchsize = sizeof(struct xt_multiport),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = AF_INET6,
+ .revision = 1,
+ .checkentry = checkentry6_v1,
+ .match = match_v1,
+ .matchsize = sizeof(struct xt_multiport_v1),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_multiport_init(void)
{
- int ret;
-
- ret = xt_register_match(&multiport_match);
- if (ret)
- goto out;
-
- ret = xt_register_match(&multiport_match_v1);
- if (ret)
- goto out_unreg_multi_v0;
-
- ret = xt_register_match(&multiport6_match);
- if (ret)
- goto out_unreg_multi_v1;
-
- ret = xt_register_match(&multiport6_match_v1);
- if (ret)
- goto out_unreg_multi6_v0;
-
- return ret;
-
-out_unreg_multi6_v0:
- xt_unregister_match(&multiport6_match);
-out_unreg_multi_v1:
- xt_unregister_match(&multiport_match_v1);
-out_unreg_multi_v0:
- xt_unregister_match(&multiport_match);
-out:
- return ret;
+ return xt_register_matches(xt_multiport_match,
+ ARRAY_SIZE(xt_multiport_match));
}
static void __exit xt_multiport_fini(void)
{
- xt_unregister_match(&multiport_match);
- xt_unregister_match(&multiport_match_v1);
- xt_unregister_match(&multiport6_match);
- xt_unregister_match(&multiport6_match_v1);
+ xt_unregister_matches(xt_multiport_match,
+ ARRAY_SIZE(xt_multiport_match));
}
module_init(xt_multiport_init);
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 5fe4c9df17f5..fd8f954cded5 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_physdev.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge.h>
@@ -105,7 +106,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_physdev_info *info = matchinfo;
@@ -113,46 +113,52 @@ checkentry(const char *tablename,
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return 0;
+ if (brnf_deferred_hooks == 0 &&
+ info->bitmask & XT_PHYSDEV_OP_OUT &&
+ (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
+ info->invert & XT_PHYSDEV_OP_BRIDGED) &&
+ hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING))) {
+ printk(KERN_WARNING "physdev match: using --physdev-out in the "
+ "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
+ "traffic is deprecated and breaks other things, it will "
+ "be removed in January 2007. See Documentation/"
+ "feature-removal-schedule.txt for details. This doesn't "
+ "affect you in case you're using it for purely bridged "
+ "traffic.\n");
+ brnf_deferred_hooks = 1;
+ }
return 1;
}
-static struct xt_match physdev_match = {
- .name = "physdev",
- .match = match,
- .matchsize = sizeof(struct xt_physdev_info),
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match physdev6_match = {
- .name = "physdev",
- .match = match,
- .matchsize = sizeof(struct xt_physdev_info),
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_physdev_match[] = {
+ {
+ .name = "physdev",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_physdev_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "physdev",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_physdev_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_physdev_init(void)
{
- int ret;
-
- ret = xt_register_match(&physdev_match);
- if (ret < 0)
- return ret;
-
- ret = xt_register_match(&physdev6_match);
- if (ret < 0)
- xt_unregister_match(&physdev_match);
-
- return ret;
+ return xt_register_matches(xt_physdev_match,
+ ARRAY_SIZE(xt_physdev_match));
}
static void __exit xt_physdev_fini(void)
{
- xt_unregister_match(&physdev_match);
- xt_unregister_match(&physdev6_match);
+ xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match));
}
module_init(xt_physdev_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 3ac703b5cb8f..16e7b0804287 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -9,6 +9,8 @@
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
#include <linux/netfilter/xt_pkttype.h>
#include <linux/netfilter/x_tables.h>
@@ -28,45 +30,45 @@ static int match(const struct sk_buff *skb,
unsigned int protoff,
int *hotdrop)
{
+ u_int8_t type;
const struct xt_pkttype_info *info = matchinfo;
- return (skb->pkt_type == info->pkttype) ^ info->invert;
-}
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ type = (MULTICAST(skb->nh.iph->daddr)
+ ? PACKET_MULTICAST
+ : PACKET_BROADCAST);
+ else
+ type = skb->pkt_type;
-static struct xt_match pkttype_match = {
- .name = "pkttype",
- .match = match,
- .matchsize = sizeof(struct xt_pkttype_info),
- .family = AF_INET,
- .me = THIS_MODULE,
-};
+ return (type == info->pkttype) ^ info->invert;
+}
-static struct xt_match pkttype6_match = {
- .name = "pkttype",
- .match = match,
- .matchsize = sizeof(struct xt_pkttype_info),
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_pkttype_match[] = {
+ {
+ .name = "pkttype",
+ .family = AF_INET,
+ .match = match,
+ .matchsize = sizeof(struct xt_pkttype_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "pkttype",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct xt_pkttype_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_pkttype_init(void)
{
- int ret;
- ret = xt_register_match(&pkttype_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&pkttype6_match);
- if (ret)
- xt_unregister_match(&pkttype_match);
-
- return ret;
+ return xt_register_matches(xt_pkttype_match,
+ ARRAY_SIZE(xt_pkttype_match));
}
static void __exit xt_pkttype_fini(void)
{
- xt_unregister_match(&pkttype_match);
- xt_unregister_match(&pkttype6_match);
+ xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match));
}
module_init(xt_pkttype_init);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index a3aa62fbda6f..46bde2b1e1e0 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -8,7 +8,6 @@
*/
#include <linux/kernel.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/init.h>
@@ -136,8 +135,7 @@ static int match(const struct sk_buff *skb,
static int checkentry(const char *tablename, const void *ip_void,
const struct xt_match *match,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
+ void *matchinfo, unsigned int hook_mask)
{
struct xt_policy_info *info = matchinfo;
@@ -166,43 +164,34 @@ static int checkentry(const char *tablename, const void *ip_void,
return 1;
}
-static struct xt_match policy_match = {
- .name = "policy",
- .family = AF_INET,
- .match = match,
- .matchsize = sizeof(struct xt_policy_info),
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match policy6_match = {
- .name = "policy",
- .family = AF_INET6,
- .match = match,
- .matchsize = sizeof(struct xt_policy_info),
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_policy_match[] = {
+ {
+ .name = "policy",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_policy_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "policy",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_policy_info),
+ .me = THIS_MODULE,
+ },
};
static int __init init(void)
{
- int ret;
-
- ret = xt_register_match(&policy_match);
- if (ret)
- return ret;
- ret = xt_register_match(&policy6_match);
- if (ret)
- xt_unregister_match(&policy_match);
- return ret;
+ return xt_register_matches(xt_policy_match,
+ ARRAY_SIZE(xt_policy_match));
}
static void __exit fini(void)
{
- xt_unregister_match(&policy6_match);
- xt_unregister_match(&policy_match);
+ xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match));
}
module_init(init);
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 4cdba7469dc4..b75fa2c70e66 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_ALIAS("ipt_quota");
+MODULE_ALIAS("ip6t_quota");
static DEFINE_SPINLOCK(quota_lock);
@@ -39,7 +41,7 @@ match(const struct sk_buff *skb,
static int
checkentry(const char *tablename, const void *entry,
const struct xt_match *match, void *matchinfo,
- unsigned int matchsize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
@@ -50,46 +52,33 @@ checkentry(const char *tablename, const void *entry,
return 1;
}
-static struct xt_match quota_match = {
- .name = "quota",
- .family = AF_INET,
- .match = match,
- .matchsize = sizeof(struct xt_quota_info),
- .checkentry = checkentry,
- .me = THIS_MODULE
-};
-
-static struct xt_match quota_match6 = {
- .name = "quota",
- .family = AF_INET6,
- .match = match,
- .matchsize = sizeof(struct xt_quota_info),
- .checkentry = checkentry,
- .me = THIS_MODULE
+static struct xt_match xt_quota_match[] = {
+ {
+ .name = "quota",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_quota_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "quota",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_quota_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_quota_init(void)
{
- int ret;
-
- ret = xt_register_match(&quota_match);
- if (ret)
- goto err1;
- ret = xt_register_match(&quota_match6);
- if (ret)
- goto err2;
- return ret;
-
-err2:
- xt_unregister_match(&quota_match);
-err1:
- return ret;
+ return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
}
static void __exit xt_quota_fini(void)
{
- xt_unregister_match(&quota_match6);
- xt_unregister_match(&quota_match);
+ xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
}
module_init(xt_quota_init);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 9316c753692f..7956acaaa24b 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -151,7 +151,7 @@ match(const struct sk_buff *skb,
&& SCCHECK(((ntohs(sh->dest) >= info->dpts[0])
&& (ntohs(sh->dest) <= info->dpts[1])),
XT_SCTP_DEST_PORTS, info->flags, info->invflags)
- && SCCHECK(match_packet(skb, protoff,
+ && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t),
info->chunkmap, info->chunk_match_type,
info->flag_info, info->flag_count,
hotdrop),
@@ -163,7 +163,6 @@ checkentry(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_sctp_info *info = matchinfo;
@@ -178,44 +177,35 @@ checkentry(const char *tablename,
| SCTP_CHUNK_MATCH_ONLY)));
}
-static struct xt_match sctp_match = {
- .name = "sctp",
- .match = match,
- .matchsize = sizeof(struct xt_sctp_info),
- .proto = IPPROTO_SCTP,
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE
-};
-
-static struct xt_match sctp6_match = {
- .name = "sctp",
- .match = match,
- .matchsize = sizeof(struct xt_sctp_info),
- .proto = IPPROTO_SCTP,
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE
+static struct xt_match xt_sctp_match[] = {
+ {
+ .name = "sctp",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_sctp_info),
+ .proto = IPPROTO_SCTP,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "sctp",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_sctp_info),
+ .proto = IPPROTO_SCTP,
+ .me = THIS_MODULE
+ },
};
static int __init xt_sctp_init(void)
{
- int ret;
- ret = xt_register_match(&sctp_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&sctp6_match);
- if (ret)
- xt_unregister_match(&sctp_match);
-
- return ret;
+ return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
}
static void __exit xt_sctp_fini(void)
{
- xt_unregister_match(&sctp6_match);
- xt_unregister_match(&sctp_match);
+ xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
}
module_init(xt_sctp_init);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f9e304dc4504..d9010b16a1f9 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -48,7 +48,6 @@ static int check(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -62,54 +61,43 @@ static int check(const char *tablename,
}
static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
#endif
}
-static struct xt_match state_match = {
- .name = "state",
- .match = match,
- .checkentry = check,
- .destroy = destroy,
- .matchsize = sizeof(struct xt_state_info),
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match state6_match = {
- .name = "state",
- .match = match,
- .checkentry = check,
- .destroy = destroy,
- .matchsize = sizeof(struct xt_state_info),
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_state_match[] = {
+ {
+ .name = "state",
+ .family = AF_INET,
+ .checkentry = check,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_state_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "state",
+ .family = AF_INET6,
+ .checkentry = check,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_state_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_state_init(void)
{
- int ret;
-
need_conntrack();
-
- ret = xt_register_match(&state_match);
- if (ret < 0)
- return ret;
-
- ret = xt_register_match(&state6_match);
- if (ret < 0)
- xt_unregister_match(&state_match);
-
- return ret;
+ return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
}
static void __exit xt_state_fini(void)
{
- xt_unregister_match(&state_match);
- xt_unregister_match(&state6_match);
+ xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
}
module_init(xt_state_init);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index de1037f58596..091a9f89f5d5 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -55,7 +55,7 @@ match(const struct sk_buff *skb,
static int
checkentry(const char *tablename, const void *entry,
const struct xt_match *match, void *matchinfo,
- unsigned int matchsize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
@@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry,
return 1;
}
-static struct xt_match statistic_match = {
- .name = "statistic",
- .match = match,
- .matchsize = sizeof(struct xt_statistic_info),
- .checkentry = checkentry,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match statistic_match6 = {
- .name = "statistic",
- .match = match,
- .matchsize = sizeof(struct xt_statistic_info),
- .checkentry = checkentry,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_statistic_match[] = {
+ {
+ .name = "statistic",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_statistic_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "statistic",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .matchsize = sizeof(struct xt_statistic_info),
+ .me = THIS_MODULE,
+ },
};
static int __init xt_statistic_init(void)
{
- int ret;
-
- ret = xt_register_match(&statistic_match);
- if (ret)
- goto err1;
-
- ret = xt_register_match(&statistic_match6);
- if (ret)
- goto err2;
- return ret;
-err2:
- xt_unregister_match(&statistic_match);
-err1:
- return ret;
+ return xt_register_matches(xt_statistic_match,
+ ARRAY_SIZE(xt_statistic_match));
}
static void __exit xt_statistic_fini(void)
{
- xt_unregister_match(&statistic_match6);
- xt_unregister_match(&statistic_match);
+ xt_unregister_matches(xt_statistic_match,
+ ARRAY_SIZE(xt_statistic_match));
}
module_init(xt_statistic_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 0ebb6ac2c8c7..4453252400aa 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -37,7 +37,7 @@ static int match(const struct sk_buff *skb,
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
conf->to_offset, conf->config, &state)
- != UINT_MAX) && !conf->invert;
+ != UINT_MAX) ^ conf->invert;
}
#define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
@@ -46,7 +46,6 @@ static int checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct xt_string_info *conf = matchinfo;
@@ -55,7 +54,10 @@ static int checkentry(const char *tablename,
/* Damn, can't handle this case properly with iptables... */
if (conf->from_offset > conf->to_offset)
return 0;
-
+ if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
+ return 0;
+ if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
+ return 0;
ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
GFP_KERNEL, TS_AUTOLOAD);
if (IS_ERR(ts_conf))
@@ -66,49 +68,40 @@ static int checkentry(const char *tablename,
return 1;
}
-static void destroy(const struct xt_match *match, void *matchinfo,
- unsigned int matchsize)
+static void destroy(const struct xt_match *match, void *matchinfo)
{
textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
}
-static struct xt_match string_match = {
- .name = "string",
- .match = match,
- .matchsize = sizeof(struct xt_string_info),
- .checkentry = checkentry,
- .destroy = destroy,
- .family = AF_INET,
- .me = THIS_MODULE
-};
-static struct xt_match string6_match = {
- .name = "string",
- .match = match,
- .matchsize = sizeof(struct xt_string_info),
- .checkentry = checkentry,
- .destroy = destroy,
- .family = AF_INET6,
- .me = THIS_MODULE
+static struct xt_match xt_string_match[] = {
+ {
+ .name = "string",
+ .family = AF_INET,
+ .checkentry = checkentry,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_string_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "string",
+ .family = AF_INET6,
+ .checkentry = checkentry,
+ .match = match,
+ .destroy = destroy,
+ .matchsize = sizeof(struct xt_string_info),
+ .me = THIS_MODULE
+ },
};
static int __init xt_string_init(void)
{
- int ret;
-
- ret = xt_register_match(&string_match);
- if (ret)
- return ret;
- ret = xt_register_match(&string6_match);
- if (ret)
- xt_unregister_match(&string_match);
-
- return ret;
+ return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
}
static void __exit xt_string_fini(void)
{
- xt_unregister_match(&string_match);
- xt_unregister_match(&string6_match);
+ xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
}
module_init(xt_string_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index cf7d335cadcd..a3682fe2f192 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -18,21 +18,22 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
-#define TH_SYN 0x02
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables TCP MSS match module");
MODULE_ALIAS("ipt_tcpmss");
-/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
-static inline int
-mssoption_match(u_int16_t min, u_int16_t max,
- const struct sk_buff *skb,
- unsigned int protoff,
- int invert,
- int *hotdrop)
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct xt_match *match,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
{
+ const struct xt_tcpmss_match_info *info = matchinfo;
struct tcphdr _tcph, *th;
/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
u8 _opt[15 * 4 - sizeof(_tcph)], *op;
@@ -64,72 +65,50 @@ mssoption_match(u_int16_t min, u_int16_t max,
mssval = (op[i+2] << 8) | op[i+3];
- return (mssval >= min && mssval <= max) ^ invert;
+ return (mssval >= info->mss_min &&
+ mssval <= info->mss_max) ^ info->invert;
}
- if (op[i] < 2) i++;
- else i += op[i+1]?:1;
+ if (op[i] < 2)
+ i++;
+ else
+ i += op[i+1] ? : 1;
}
out:
- return invert;
+ return info->invert;
- dropit:
+dropit:
*hotdrop = 1;
return 0;
}
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct xt_tcpmss_match_info *info = matchinfo;
-
- return mssoption_match(info->mss_min, info->mss_max, skb, protoff,
- info->invert, hotdrop);
-}
-
-static struct xt_match tcpmss_match = {
- .name = "tcpmss",
- .match = match,
- .matchsize = sizeof(struct xt_tcpmss_match_info),
- .proto = IPPROTO_TCP,
- .family = AF_INET,
- .me = THIS_MODULE,
-};
-
-static struct xt_match tcpmss6_match = {
- .name = "tcpmss",
- .match = match,
- .matchsize = sizeof(struct xt_tcpmss_match_info),
- .proto = IPPROTO_TCP,
- .family = AF_INET6,
- .me = THIS_MODULE,
+static struct xt_match xt_tcpmss_match[] = {
+ {
+ .name = "tcpmss",
+ .family = AF_INET,
+ .match = match,
+ .matchsize = sizeof(struct xt_tcpmss_match_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tcpmss",
+ .family = AF_INET6,
+ .match = match,
+ .matchsize = sizeof(struct xt_tcpmss_match_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
};
-
static int __init xt_tcpmss_init(void)
{
- int ret;
- ret = xt_register_match(&tcpmss_match);
- if (ret)
- return ret;
-
- ret = xt_register_match(&tcpmss6_match);
- if (ret)
- xt_unregister_match(&tcpmss_match);
-
- return ret;
+ return xt_register_matches(xt_tcpmss_match,
+ ARRAY_SIZE(xt_tcpmss_match));
}
static void __exit xt_tcpmss_fini(void)
{
- xt_unregister_match(&tcpmss6_match);
- xt_unregister_match(&tcpmss_match);
+ xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match));
}
module_init(xt_tcpmss_init);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 1b61dac9c873..e76a68e0bc66 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_tcp *tcpinfo = matchinfo;
@@ -190,7 +189,6 @@ udp_checkentry(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct xt_tcp *udpinfo = matchinfo;
@@ -199,81 +197,54 @@ udp_checkentry(const char *tablename,
return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
}
-static struct xt_match tcp_matchstruct = {
- .name = "tcp",
- .match = tcp_match,
- .matchsize = sizeof(struct xt_tcp),
- .proto = IPPROTO_TCP,
- .family = AF_INET,
- .checkentry = tcp_checkentry,
- .me = THIS_MODULE,
-};
-
-static struct xt_match tcp6_matchstruct = {
- .name = "tcp",
- .match = tcp_match,
- .matchsize = sizeof(struct xt_tcp),
- .proto = IPPROTO_TCP,
- .family = AF_INET6,
- .checkentry = tcp_checkentry,
- .me = THIS_MODULE,
-};
-
-static struct xt_match udp_matchstruct = {
- .name = "udp",
- .match = udp_match,
- .matchsize = sizeof(struct xt_udp),
- .proto = IPPROTO_UDP,
- .family = AF_INET,
- .checkentry = udp_checkentry,
- .me = THIS_MODULE,
-};
-static struct xt_match udp6_matchstruct = {
- .name = "udp",
- .match = udp_match,
- .matchsize = sizeof(struct xt_udp),
- .proto = IPPROTO_UDP,
- .family = AF_INET6,
- .checkentry = udp_checkentry,
- .me = THIS_MODULE,
+static struct xt_match xt_tcpudp_match[] = {
+ {
+ .name = "tcp",
+ .family = AF_INET,
+ .checkentry = tcp_checkentry,
+ .match = tcp_match,
+ .matchsize = sizeof(struct xt_tcp),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tcp",
+ .family = AF_INET6,
+ .checkentry = tcp_checkentry,
+ .match = tcp_match,
+ .matchsize = sizeof(struct xt_tcp),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udp",
+ .family = AF_INET,
+ .checkentry = udp_checkentry,
+ .match = udp_match,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udp",
+ .family = AF_INET6,
+ .checkentry = udp_checkentry,
+ .match = udp_match,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ },
};
static int __init xt_tcpudp_init(void)
{
- int ret;
- ret = xt_register_match(&tcp_matchstruct);
- if (ret)
- return ret;
-
- ret = xt_register_match(&tcp6_matchstruct);
- if (ret)
- goto out_unreg_tcp;
-
- ret = xt_register_match(&udp_matchstruct);
- if (ret)
- goto out_unreg_tcp6;
-
- ret = xt_register_match(&udp6_matchstruct);
- if (ret)
- goto out_unreg_udp;
-
- return ret;
-
-out_unreg_udp:
- xt_unregister_match(&tcp_matchstruct);
-out_unreg_tcp6:
- xt_unregister_match(&tcp6_matchstruct);
-out_unreg_tcp:
- xt_unregister_match(&tcp_matchstruct);
- return ret;
+ return xt_register_matches(xt_tcpudp_match,
+ ARRAY_SIZE(xt_tcpudp_match));
}
static void __exit xt_tcpudp_fini(void)
{
- xt_unregister_match(&udp6_matchstruct);
- xt_unregister_match(&udp_matchstruct);
- xt_unregister_match(&tcp6_matchstruct);
- xt_unregister_match(&tcp_matchstruct);
+ xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match));
}
module_init(xt_tcpudp_init);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
new file mode 100644
index 000000000000..9f7121ae13e9
--- /dev/null
+++ b/net/netlabel/Kconfig
@@ -0,0 +1,17 @@
+#
+# NetLabel configuration
+#
+
+config NETLABEL
+ bool "NetLabel subsystem support"
+ depends on NET && SECURITY
+ default n
+ ---help---
+ NetLabel provides support for explicit network packet labeling
+ protocols such as CIPSO and RIPSO. For more information see
+ Documentation/netlabel as well as the NetLabel SourceForge project
+ for configuration tools and additional documentation.
+
+ * http://netlabel.sf.net
+
+ If you are unsure, say N.
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
new file mode 100644
index 000000000000..8af18c0a47d9
--- /dev/null
+++ b/net/netlabel/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the NetLabel subsystem.
+#
+# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
+#
+
+# base objects
+obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+
+# management objects
+obj-y += netlabel_mgmt.o
+
+# protocol modules
+obj-y += netlabel_unlabeled.o
+obj-y += netlabel_cipso_v4.o
+
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
new file mode 100644
index 000000000000..a6ce1d6d5c59
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -0,0 +1,773 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/audit.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_user.h"
+#include "netlabel_cipso_v4.h"
+
+/* Argument struct for cipso_v4_doi_walk() */
+struct netlbl_cipsov4_doiwalk_arg {
+ struct netlink_callback *nl_cb;
+ struct sk_buff *skb;
+ u32 seq;
+};
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_cipsov4_gnl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_CIPSOV4_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_CIPSOV4_A_MAX,
+};
+
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
+ [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 },
+ [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED },
+};
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
+{
+ struct cipso_v4_doi *ptr;
+
+ ptr = container_of(entry, struct cipso_v4_doi, rcu);
+ switch (ptr->type) {
+ case CIPSO_V4_MAP_STD:
+ kfree(ptr->map.std->lvl.cipso);
+ kfree(ptr->map.std->lvl.local);
+ kfree(ptr->map.std->cat.cipso);
+ kfree(ptr->map.std->cat.local);
+ break;
+ }
+ kfree(ptr);
+}
+
+/**
+ * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
+ * @info: the Generic NETLINK info block
+ * @doi_def: the CIPSO V4 DOI definition
+ *
+ * Description:
+ * Parse the common sections of a ADD message and fill in the related values
+ * in @doi_def. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add_common(struct genl_info *info,
+ struct cipso_v4_doi *doi_def)
+{
+ struct nlattr *nla;
+ int nla_rem;
+ u32 iter = 0;
+
+ doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ return -EINVAL;
+
+ nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
+ if (nla->nla_type == NLBL_CIPSOV4_A_TAG) {
+ if (iter > CIPSO_V4_TAG_MAXCNT)
+ return -EINVAL;
+ doi_def->tags[iter++] = nla_get_u8(nla);
+ }
+ if (iter < CIPSO_V4_TAG_MAXCNT)
+ doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+ return 0;
+}
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine. Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_std(struct genl_info *info)
+{
+ int ret_val = -EINVAL;
+ struct cipso_v4_doi *doi_def = NULL;
+ struct nlattr *nla_a;
+ struct nlattr *nla_b;
+ int nla_a_rem;
+ int nla_b_rem;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
+ !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
+ return -EINVAL;
+
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ return -EINVAL;
+
+ doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+ if (doi_def == NULL)
+ return -ENOMEM;
+ doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
+ if (doi_def->map.std == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ doi_def->type = CIPSO_V4_MAP_STD;
+
+ ret_val = netlbl_cipsov4_add_common(info, doi_def);
+ if (ret_val != 0)
+ goto add_std_failure;
+
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+ nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+ switch (nla_b->nla_type) {
+ case NLBL_CIPSOV4_A_MLSLVLLOC:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->lvl.local_size)
+ doi_def->map.std->lvl.local_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ case NLBL_CIPSOV4_A_MLSLVLREM:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->lvl.cipso_size)
+ doi_def->map.std->lvl.cipso_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ }
+ }
+ if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS ||
+ doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
+ goto add_std_failure;
+ doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
+ sizeof(u32),
+ GFP_KERNEL);
+ if (doi_def->map.std->lvl.local == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
+ sizeof(u32),
+ GFP_KERNEL);
+ if (doi_def->map.std->lvl.cipso == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+ struct nlattr *lvl_loc;
+ struct nlattr *lvl_rem;
+
+ if (nla_validate_nested(nla_a,
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+
+ lvl_loc = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSLVLLOC);
+ lvl_rem = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSLVLREM);
+ if (lvl_loc == NULL || lvl_rem == NULL)
+ goto add_std_failure;
+ doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] =
+ nla_get_u32(lvl_rem);
+ doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] =
+ nla_get_u32(lvl_loc);
+ }
+
+ if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+ if (nla_validate_nested(nla_a,
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+ nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+ switch (nla_b->nla_type) {
+ case NLBL_CIPSOV4_A_MLSCATLOC:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->cat.local_size)
+ doi_def->map.std->cat.local_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ case NLBL_CIPSOV4_A_MLSCATREM:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->cat.cipso_size)
+ doi_def->map.std->cat.cipso_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ }
+ }
+ if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS ||
+ doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+ goto add_std_failure;
+ doi_def->map.std->cat.local = kcalloc(
+ doi_def->map.std->cat.local_size,
+ sizeof(u32),
+ GFP_KERNEL);
+ if (doi_def->map.std->cat.local == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ doi_def->map.std->cat.cipso = kcalloc(
+ doi_def->map.std->cat.cipso_size,
+ sizeof(u32),
+ GFP_KERNEL);
+ if (doi_def->map.std->cat.cipso == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+ struct nlattr *cat_loc;
+ struct nlattr *cat_rem;
+
+ cat_loc = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSCATLOC);
+ cat_rem = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSCATREM);
+ if (cat_loc == NULL || cat_rem == NULL)
+ goto add_std_failure;
+ doi_def->map.std->cat.local[
+ nla_get_u32(cat_loc)] =
+ nla_get_u32(cat_rem);
+ doi_def->map.std->cat.cipso[
+ nla_get_u32(cat_rem)] =
+ nla_get_u32(cat_loc);
+ }
+ }
+
+ ret_val = cipso_v4_doi_add(doi_def);
+ if (ret_val != 0)
+ goto add_std_failure;
+ return 0;
+
+add_std_failure:
+ if (doi_def)
+ netlbl_cipsov4_doi_free(&doi_def->rcu);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine. Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_pass(struct genl_info *info)
+{
+ int ret_val;
+ struct cipso_v4_doi *doi_def = NULL;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
+ return -EINVAL;
+
+ doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+ if (doi_def == NULL)
+ return -ENOMEM;
+ doi_def->type = CIPSO_V4_MAP_PASS;
+
+ ret_val = netlbl_cipsov4_add_common(info, doi_def);
+ if (ret_val != 0)
+ goto add_pass_failure;
+
+ ret_val = cipso_v4_doi_add(doi_def);
+ if (ret_val != 0)
+ goto add_pass_failure;
+ return 0;
+
+add_pass_failure:
+ netlbl_cipsov4_doi_free(&doi_def->rcu);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new DOI definition based on the given ADD message and add it to the
+ * CIPSO V4 engine. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
+
+{
+ int ret_val = -EINVAL;
+ u32 type;
+ u32 doi;
+ const char *type_str = "(unknown)";
+ struct audit_buffer *audit_buf;
+ struct netlbl_audit audit_info;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+ !info->attrs[NLBL_CIPSOV4_A_MTYPE])
+ return -EINVAL;
+
+ doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
+ switch (type) {
+ case CIPSO_V4_MAP_STD:
+ type_str = "std";
+ ret_val = netlbl_cipsov4_add_std(info);
+ break;
+ case CIPSO_V4_MAP_PASS:
+ type_str = "pass";
+ ret_val = netlbl_cipsov4_add_pass(info);
+ break;
+ }
+
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+ &audit_info);
+ audit_log_format(audit_buf,
+ " cipso_doi=%u cipso_type=%s res=%u",
+ doi,
+ type_str,
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond accordingly. While the
+ * response message generated by the kernel is straightforward, determining
+ * before hand the size of the buffer to allocate is not (we have to generate
+ * the message to know the size). In order to keep this function sane what we
+ * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in
+ * that size, if we fail then we restart with a larger buffer and try again.
+ * We continue in this manner until we hit a limit of failed attempts then we
+ * give up and just send an error message. Returns zero on success and
+ * negative values on error.
+ *
+ */
+static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val;
+ struct sk_buff *ans_skb = NULL;
+ u32 nlsze_mult = 1;
+ void *data;
+ u32 doi;
+ struct nlattr *nla_a;
+ struct nlattr *nla_b;
+ struct cipso_v4_doi *doi_def;
+ u32 iter;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI]) {
+ ret_val = -EINVAL;
+ goto list_failure;
+ }
+
+list_start:
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
+ if (ans_skb == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure;
+ }
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_cipsov4_gnl_family.id,
+ 0,
+ NLBL_CIPSOV4_C_LIST);
+ if (data == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure;
+ }
+
+ doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL) {
+ ret_val = -EINVAL;
+ goto list_failure;
+ }
+
+ ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
+ if (ret_val != 0)
+ goto list_failure_lock;
+
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure_lock;
+ }
+ for (iter = 0;
+ iter < CIPSO_V4_TAG_MAXCNT &&
+ doi_def->tags[iter] != CIPSO_V4_TAG_INVALID;
+ iter++) {
+ ret_val = nla_put_u8(ans_skb,
+ NLBL_CIPSOV4_A_TAG,
+ doi_def->tags[iter]);
+ if (ret_val != 0)
+ goto list_failure_lock;
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_STD:
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure_lock;
+ }
+ for (iter = 0;
+ iter < doi_def->map.std->lvl.local_size;
+ iter++) {
+ if (doi_def->map.std->lvl.local[iter] ==
+ CIPSO_V4_INV_LVL)
+ continue;
+
+ nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL);
+ if (nla_b == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSLVLLOC,
+ iter);
+ if (ret_val != 0)
+ goto list_retry;
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSLVLREM,
+ doi_def->map.std->lvl.local[iter]);
+ if (ret_val != 0)
+ goto list_retry;
+ nla_nest_end(ans_skb, nla_b);
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ for (iter = 0;
+ iter < doi_def->map.std->cat.local_size;
+ iter++) {
+ if (doi_def->map.std->cat.local[iter] ==
+ CIPSO_V4_INV_CAT)
+ continue;
+
+ nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT);
+ if (nla_b == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSCATLOC,
+ iter);
+ if (ret_val != 0)
+ goto list_retry;
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSCATREM,
+ doi_def->map.std->cat.local[iter]);
+ if (ret_val != 0)
+ goto list_retry;
+ nla_nest_end(ans_skb, nla_b);
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ break;
+ }
+ rcu_read_unlock();
+
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ if (ret_val != 0)
+ goto list_failure;
+
+ return 0;
+
+list_retry:
+ /* XXX - this limit is a guesstimate */
+ if (nlsze_mult < 4) {
+ rcu_read_unlock();
+ kfree_skb(ans_skb);
+ nlsze_mult++;
+ goto list_start;
+ }
+list_failure_lock:
+ rcu_read_unlock();
+list_failure:
+ kfree_skb(ans_skb);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL
+ * @doi_def: the CIPSOv4 DOI definition
+ * @arg: the netlbl_cipsov4_doiwalk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * cipso_v4_doi_walk() function for use in generating a response for a LISTALL
+ * message. Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
+{
+ int ret_val = -ENOMEM;
+ struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
+ void *data;
+
+ data = netlbl_netlink_hdr_put(cb_arg->skb,
+ NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq,
+ netlbl_cipsov4_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_CIPSOV4_C_LISTALL);
+ if (data == NULL)
+ goto listall_cb_failure;
+
+ ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ ret_val = nla_put_u32(cb_arg->skb,
+ NLBL_CIPSOV4_A_MTYPE,
+ doi_def->type);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+
+ return genlmsg_end(cb_arg->skb, data);
+
+listall_cb_failure:
+ genlmsg_cancel(cb_arg->skb, data);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall - Handle a LISTALL message
+ * @skb: the NETLINK buffer
+ * @cb: the NETLINK callback
+ *
+ * Description:
+ * Process a user generated LISTALL message and respond accordingly. Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_listall(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct netlbl_cipsov4_doiwalk_arg cb_arg;
+ int doi_skip = cb->args[0];
+
+ cb_arg.nl_cb = cb;
+ cb_arg.skb = skb;
+ cb_arg.seq = cb->nlh->nlmsg_seq;
+
+ cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg);
+
+ cb->args[0] = doi_skip;
+ return skb->len;
+}
+
+/**
+ * netlbl_cipsov4_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and respond accordingly. Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -EINVAL;
+ u32 doi = 0;
+ struct audit_buffer *audit_buf;
+ struct netlbl_audit audit_info;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI])
+ return -EINVAL;
+
+ doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ ret_val = cipso_v4_doi_remove(doi,
+ &audit_info,
+ netlbl_cipsov4_doi_free);
+
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
+ &audit_info);
+ audit_log_format(audit_buf,
+ " cipso_doi=%u res=%u",
+ doi,
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+
+ return ret_val;
+}
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_cipsov4_genl_c_add = {
+ .cmd = NLBL_CIPSOV4_C_ADD,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_cipsov4_genl_policy,
+ .doit = netlbl_cipsov4_add,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_remove = {
+ .cmd = NLBL_CIPSOV4_C_REMOVE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_cipsov4_genl_policy,
+ .doit = netlbl_cipsov4_remove,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_list = {
+ .cmd = NLBL_CIPSOV4_C_LIST,
+ .flags = 0,
+ .policy = netlbl_cipsov4_genl_policy,
+ .doit = netlbl_cipsov4_list,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_listall = {
+ .cmd = NLBL_CIPSOV4_C_LISTALL,
+ .flags = 0,
+ .policy = netlbl_cipsov4_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_cipsov4_listall,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component
+ *
+ * Description:
+ * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK
+ * mechanism. Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_cipsov4_genl_init(void)
+{
+ int ret_val;
+
+ ret_val = genl_register_family(&netlbl_cipsov4_gnl_family);
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+ &netlbl_cipsov4_genl_c_add);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+ &netlbl_cipsov4_genl_c_remove);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+ &netlbl_cipsov4_genl_c_list);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+ &netlbl_cipsov4_genl_c_listall);
+ if (ret_val != 0)
+ return ret_val;
+
+ return 0;
+}
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
new file mode 100644
index 000000000000..f03cf9b78286
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -0,0 +1,166 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_CIPSO_V4
+#define _NETLABEL_CIPSO_V4
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the CIPSO subsystem.
+ *
+ * o ADD:
+ * Sent by an application to add a new DOI mapping table.
+ *
+ * Required attributes:
+ *
+ * NLBL_CIPSOV4_A_DOI
+ * NLBL_CIPSOV4_A_MTYPE
+ * NLBL_CIPSOV4_A_TAGLST
+ *
+ * If using CIPSO_V4_MAP_STD the following attributes are required:
+ *
+ * NLBL_CIPSOV4_A_MLSLVLLST
+ * NLBL_CIPSOV4_A_MLSCATLST
+ *
+ * If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *
+ * o REMOVE:
+ * Sent by an application to remove a specific DOI mapping table from the
+ * CIPSO V4 system.
+ *
+ * Required attributes:
+ *
+ * NLBL_CIPSOV4_A_DOI
+ *
+ * o LIST:
+ * Sent by an application to list the details of a DOI definition. On
+ * success the kernel should send a response using the following format.
+ *
+ * Required attributes:
+ *
+ * NLBL_CIPSOV4_A_DOI
+ *
+ * The valid response message format depends on the type of the DOI mapping,
+ * the defined formats are shown below.
+ *
+ * Required attributes:
+ *
+ * NLBL_CIPSOV4_A_MTYPE
+ * NLBL_CIPSOV4_A_TAGLST
+ *
+ * If using CIPSO_V4_MAP_STD the following attributes are required:
+ *
+ * NLBL_CIPSOV4_A_MLSLVLLST
+ * NLBL_CIPSOV4_A_MLSCATLST
+ *
+ * If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *
+ * o LISTALL:
+ * This message is sent by an application to list the valid DOIs on the
+ * system. When sent by an application there is no payload and the
+ * NLM_F_DUMP flag should be set. The kernel should respond with a series of
+ * the following messages.
+ *
+ * Required attributes:
+ *
+ * NLBL_CIPSOV4_A_DOI
+ * NLBL_CIPSOV4_A_MTYPE
+ *
+ */
+
+/* NetLabel CIPSOv4 commands */
+enum {
+ NLBL_CIPSOV4_C_UNSPEC,
+ NLBL_CIPSOV4_C_ADD,
+ NLBL_CIPSOV4_C_REMOVE,
+ NLBL_CIPSOV4_C_LIST,
+ NLBL_CIPSOV4_C_LISTALL,
+ __NLBL_CIPSOV4_C_MAX,
+};
+#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
+
+/* NetLabel CIPSOv4 attributes */
+enum {
+ NLBL_CIPSOV4_A_UNSPEC,
+ NLBL_CIPSOV4_A_DOI,
+ /* (NLA_U32)
+ * the DOI value */
+ NLBL_CIPSOV4_A_MTYPE,
+ /* (NLA_U32)
+ * the mapping table type (defined in the cipso_ipv4.h header as
+ * CIPSO_V4_MAP_*) */
+ NLBL_CIPSOV4_A_TAG,
+ /* (NLA_U8)
+ * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST
+ * attribute */
+ NLBL_CIPSOV4_A_TAGLST,
+ /* (NLA_NESTED)
+ * the CIPSO tag list for the DOI, there must be at least one
+ * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher
+ * priorirty when sending packets */
+ NLBL_CIPSOV4_A_MLSLVLLOC,
+ /* (NLA_U32)
+ * the local MLS sensitivity level */
+ NLBL_CIPSOV4_A_MLSLVLREM,
+ /* (NLA_U32)
+ * the remote MLS sensitivity level */
+ NLBL_CIPSOV4_A_MLSLVL,
+ /* (NLA_NESTED)
+ * a MLS sensitivity level mapping, must contain only one attribute of
+ * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and
+ * NLBL_CIPSOV4_A_MLSLVLREM */
+ NLBL_CIPSOV4_A_MLSLVLLST,
+ /* (NLA_NESTED)
+ * the CIPSO level mappings, there must be at least one
+ * NLBL_CIPSOV4_A_MLSLVL attribute */
+ NLBL_CIPSOV4_A_MLSCATLOC,
+ /* (NLA_U32)
+ * the local MLS category */
+ NLBL_CIPSOV4_A_MLSCATREM,
+ /* (NLA_U32)
+ * the remote MLS category */
+ NLBL_CIPSOV4_A_MLSCAT,
+ /* (NLA_NESTED)
+ * a MLS category mapping, must contain only one attribute of each of
+ * the following types: NLBL_CIPSOV4_A_MLSCATLOC and
+ * NLBL_CIPSOV4_A_MLSCATREM */
+ NLBL_CIPSOV4_A_MLSCATLST,
+ /* (NLA_NESTED)
+ * the CIPSO category mappings, there must be at least one
+ * NLBL_CIPSOV4_A_MLSCAT attribute */
+ __NLBL_CIPSOV4_A_MAX,
+};
+#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_cipsov4_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
new file mode 100644
index 000000000000..af4371d3b459
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.c
@@ -0,0 +1,448 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain. The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/audit.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
+#include "netlabel_user.h"
+
+struct netlbl_domhsh_tbl {
+ struct list_head *tbl;
+ u32 size;
+};
+
+/* Domain hash table */
+/* XXX - updates should be so rare that having one spinlock for the entire
+ * hash table should be okay */
+static DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
+
+/* Default domain mapping */
+static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
+static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
+
+/*
+ * Domain Hash Table Helper Functions
+ */
+
+/**
+ * netlbl_domhsh_free_entry - Frees a domain hash table entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a hash table entry can be released
+ * safely.
+ *
+ */
+static void netlbl_domhsh_free_entry(struct rcu_head *entry)
+{
+ struct netlbl_dom_map *ptr;
+
+ ptr = container_of(entry, struct netlbl_dom_map, rcu);
+ kfree(ptr->domain);
+ kfree(ptr);
+}
+
+/**
+ * netlbl_domhsh_hash - Hashing function for the domain hash table
+ * @domain: the domain name to hash
+ *
+ * Description:
+ * This is the hashing function for the domain hash table, it returns the
+ * correct bucket number for the domain. The caller is responsibile for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+static u32 netlbl_domhsh_hash(const char *key)
+{
+ u32 iter;
+ u32 val;
+ u32 len;
+
+ /* This is taken (with slight modification) from
+ * security/selinux/ss/symtab.c:symhash() */
+
+ for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
+ val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
+ return val & (rcu_dereference(netlbl_domhsh)->size - 1);
+}
+
+/**
+ * netlbl_domhsh_search - Search for a domain entry
+ * @domain: the domain
+ * @def: return default if no match is found
+ *
+ * Description:
+ * Searches the domain hash table and returns a pointer to the hash table
+ * entry if found, otherwise NULL is returned. If @def is non-zero and a
+ * match is not found in the domain hash table the default mapping is returned
+ * if it exists. The caller is responsibile for the rcu hash table locks
+ * (i.e. the caller much call rcu_read_[un]lock()).
+ *
+ */
+static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
+{
+ u32 bkt;
+ struct netlbl_dom_map *iter;
+
+ if (domain != NULL) {
+ bkt = netlbl_domhsh_hash(domain);
+ list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
+ if (iter->valid && strcmp(iter->domain, domain) == 0)
+ return iter;
+ }
+
+ if (def != 0) {
+ iter = rcu_dereference(netlbl_domhsh_def);
+ if (iter != NULL && iter->valid)
+ return iter;
+ }
+
+ return NULL;
+}
+
+/*
+ * Domain Hash Table Functions
+ */
+
+/**
+ * netlbl_domhsh_init - Init for the domain hash
+ * @size: the number of bits to use for the hash buckets
+ *
+ * Description:
+ * Initializes the domain hash table, should be called only by
+ * netlbl_user_init() during initialization. Returns zero on success, non-zero
+ * values on error.
+ *
+ */
+int netlbl_domhsh_init(u32 size)
+{
+ u32 iter;
+ struct netlbl_domhsh_tbl *hsh_tbl;
+
+ if (size == 0)
+ return -EINVAL;
+
+ hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
+ if (hsh_tbl == NULL)
+ return -ENOMEM;
+ hsh_tbl->size = 1 << size;
+ hsh_tbl->tbl = kcalloc(hsh_tbl->size,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (hsh_tbl->tbl == NULL) {
+ kfree(hsh_tbl);
+ return -ENOMEM;
+ }
+ for (iter = 0; iter < hsh_tbl->size; iter++)
+ INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
+
+ rcu_read_lock();
+ spin_lock(&netlbl_domhsh_lock);
+ rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
+ spin_unlock(&netlbl_domhsh_lock);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * netlbl_domhsh_add - Adds a entry to the domain hash table
+ * @entry: the entry to add
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Adds a new entry to the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO). Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info)
+{
+ int ret_val;
+ u32 bkt;
+ struct audit_buffer *audit_buf;
+ char *audit_domain;
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = 0;
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
+ entry->domain);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret_val != 0)
+ return ret_val;
+
+ entry->valid = 1;
+ INIT_RCU_HEAD(&entry->rcu);
+
+ ret_val = 0;
+ rcu_read_lock();
+ if (entry->domain != NULL) {
+ bkt = netlbl_domhsh_hash(entry->domain);
+ spin_lock(&netlbl_domhsh_lock);
+ if (netlbl_domhsh_search(entry->domain, 0) == NULL)
+ list_add_tail_rcu(&entry->list,
+ &netlbl_domhsh->tbl[bkt]);
+ else
+ ret_val = -EEXIST;
+ spin_unlock(&netlbl_domhsh_lock);
+ } else if (entry->domain == NULL) {
+ INIT_LIST_HEAD(&entry->list);
+ spin_lock(&netlbl_domhsh_def_lock);
+ if (rcu_dereference(netlbl_domhsh_def) == NULL)
+ rcu_assign_pointer(netlbl_domhsh_def, entry);
+ else
+ ret_val = -EEXIST;
+ spin_unlock(&netlbl_domhsh_def_lock);
+ } else
+ ret_val = -EINVAL;
+
+ if (entry->domain != NULL)
+ audit_domain = entry->domain;
+ else
+ audit_domain = "(default)";
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
+ audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain);
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ audit_log_format(audit_buf,
+ " nlbl_protocol=cipsov4 cipso_doi=%u",
+ entry->type_def.cipsov4->doi);
+ break;
+ }
+ audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+
+ rcu_read_unlock();
+
+ if (ret_val != 0) {
+ switch (entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+ entry->domain) != 0)
+ BUG();
+ break;
+ }
+ }
+
+ return ret_val;
+}
+
+/**
+ * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
+ * @entry: the entry to add
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Adds a new default entry to the domain hash table and handles any updates
+ * to the lower level protocol handler (i.e. CIPSO). Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info)
+{
+ return netlbl_domhsh_add(entry, audit_info);
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO). Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+{
+ int ret_val = -ENOENT;
+ struct netlbl_dom_map *entry;
+ struct audit_buffer *audit_buf;
+ char *audit_domain;
+
+ rcu_read_lock();
+ if (domain != NULL)
+ entry = netlbl_domhsh_search(domain, 0);
+ else
+ entry = netlbl_domhsh_search(domain, 1);
+ if (entry == NULL)
+ goto remove_return;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+ entry->domain);
+ if (ret_val != 0)
+ goto remove_return;
+ break;
+ }
+ ret_val = 0;
+ if (entry != rcu_dereference(netlbl_domhsh_def)) {
+ spin_lock(&netlbl_domhsh_lock);
+ if (entry->valid) {
+ entry->valid = 0;
+ list_del_rcu(&entry->list);
+ } else
+ ret_val = -ENOENT;
+ spin_unlock(&netlbl_domhsh_lock);
+ } else {
+ spin_lock(&netlbl_domhsh_def_lock);
+ if (entry->valid) {
+ entry->valid = 0;
+ rcu_assign_pointer(netlbl_domhsh_def, NULL);
+ } else
+ ret_val = -ENOENT;
+ spin_unlock(&netlbl_domhsh_def_lock);
+ }
+
+ if (entry->domain != NULL)
+ audit_domain = entry->domain;
+ else
+ audit_domain = "(default)";
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
+ audit_log_format(audit_buf,
+ " nlbl_domain=%s res=%u",
+ audit_domain,
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+
+ if (ret_val == 0)
+ call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+
+remove_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove_default - Removes the default entry from the table
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes/resets the default entry for the domain hash table and handles any
+ * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on
+ * success, non-zero on failure.
+ *
+ */
+int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info)
+{
+ return netlbl_domhsh_remove(NULL, audit_info);
+}
+
+/**
+ * netlbl_domhsh_getentry - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain,
+ * return a pointer to a copy of the entry or NULL. The caller is responsibile
+ * for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
+{
+ return netlbl_domhsh_search(domain, 1);
+}
+
+/**
+ * netlbl_domhsh_walk - Iterate through the domain mapping hash table
+ * @skip_bkt: the number of buckets to skip at the start
+ * @skip_chain: the number of entries to skip in the first iterated bucket
+ * @callback: callback for each entry
+ * @cb_arg: argument for the callback function
+ *
+ * Description:
+ * Interate over the domain mapping hash table, skipping the first @skip_bkt
+ * buckets and @skip_chain entries. For each entry in the table call
+ * @callback, if @callback returns a negative value stop 'walking' through the
+ * table and return. Updates the values in @skip_bkt and @skip_chain on
+ * return. Returns zero on succcess, negative values on failure.
+ *
+ */
+int netlbl_domhsh_walk(u32 *skip_bkt,
+ u32 *skip_chain,
+ int (*callback) (struct netlbl_dom_map *entry, void *arg),
+ void *cb_arg)
+{
+ int ret_val = -ENOENT;
+ u32 iter_bkt;
+ struct netlbl_dom_map *iter_entry;
+ u32 chain_cnt = 0;
+
+ rcu_read_lock();
+ for (iter_bkt = *skip_bkt;
+ iter_bkt < rcu_dereference(netlbl_domhsh)->size;
+ iter_bkt++, chain_cnt = 0) {
+ list_for_each_entry_rcu(iter_entry,
+ &netlbl_domhsh->tbl[iter_bkt],
+ list)
+ if (iter_entry->valid) {
+ if (chain_cnt++ < *skip_chain)
+ continue;
+ ret_val = callback(iter_entry, cb_arg);
+ if (ret_val < 0) {
+ chain_cnt--;
+ goto walk_return;
+ }
+ }
+ }
+
+walk_return:
+ rcu_read_unlock();
+ *skip_bkt = iter_bkt;
+ *skip_chain = chain_cnt;
+ return ret_val;
+}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
new file mode 100644
index 000000000000..3689956c3436
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.h
@@ -0,0 +1,71 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain. The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_DOMAINHASH_H
+#define _NETLABEL_DOMAINHASH_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
+/* Domain hash table size */
+/* XXX - currently this number is an uneducated guess */
+#define NETLBL_DOMHSH_BITSIZE 7
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map {
+ char *domain;
+ u32 type;
+ union {
+ struct cipso_v4_doi *cipsov4;
+ } type_def;
+
+ u32 valid;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+/* init function */
+int netlbl_domhsh_init(u32 size);
+
+/* Manipulate the domain hash table */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+int netlbl_domhsh_walk(u32 *skip_bkt,
+ u32 *skip_chain,
+ int (*callback) (struct netlbl_dom_map *entry, void *arg),
+ void *cb_arg);
+
+#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
new file mode 100644
index 000000000000..54fb7de3c2b1
--- /dev/null
+++ b/net/netlabel/netlabel_kapi.c
@@ -0,0 +1,254 @@
+/*
+ * NetLabel Kernel API
+ *
+ * This file defines the kernel API for the NetLabel system. The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_user.h"
+
+/*
+ * LSM Functions
+ */
+
+/**
+ * netlbl_socket_setattr - Label a socket using the correct protocol
+ * @sock: the socket to label
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given socket using the security attributes
+ * specified in @secattr. This function requires exclusive access to
+ * @sock->sk, which means it either needs to be in the process of being
+ * created or locked via lock_sock(sock->sk). Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_socket_setattr(const struct socket *sock,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOENT;
+ struct netlbl_dom_map *dom_entry;
+
+ rcu_read_lock();
+ dom_entry = netlbl_domhsh_getentry(secattr->domain);
+ if (dom_entry == NULL)
+ goto socket_setattr_return;
+ switch (dom_entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = cipso_v4_socket_setattr(sock,
+ dom_entry->type_def.cipsov4,
+ secattr);
+ break;
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = 0;
+ break;
+ default:
+ ret_val = -ENOENT;
+ }
+
+socket_setattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/**
+ * netlbl_sock_getattr - Determine the security attributes of a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given sock to see any NetLabel style labeling has been
+ * applied to the sock, if so it parses the socket label and returns the
+ * security attributes in @secattr. Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_sock_getattr(sk, secattr);
+ if (ret_val == 0)
+ return 0;
+
+ return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_socket_getattr - Determine the security attributes of a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given socket to see any NetLabel style labeling has been
+ * applied to the socket, if so it parses the socket label and returns the
+ * security attributes in @secattr. Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_socket_getattr(const struct socket *sock,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_socket_getattr(sock, secattr);
+ if (ret_val == 0)
+ return 0;
+
+ return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_getattr - Determine the security attributes of a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given packet to see if a recognized form of packet labeling
+ * is present, if so it parses the packet label and returns the security
+ * attributes in @secattr. Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_skbuff_getattr(skb, secattr);
+ if (ret_val == 0)
+ return 0;
+
+ return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_err - Handle a LSM error on a sk_buff
+ * @skb: the packet
+ * @error: the error code
+ *
+ * Description:
+ * Deal with a LSM problem when handling the packet in @skb, typically this is
+ * a permission denied problem (-EACCES). The correct action is determined
+ * according to the packet's labeling protocol.
+ *
+ */
+void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+ if (CIPSO_V4_OPTEXIST(skb))
+ cipso_v4_error(skb, error, 0);
+}
+
+/**
+ * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches
+ *
+ * Description:
+ * For all of the NetLabel protocols that support some form of label mapping
+ * cache, invalidate the cache. Returns zero on success, negative values on
+ * error.
+ *
+ */
+void netlbl_cache_invalidate(void)
+{
+ cipso_v4_cache_invalidate();
+}
+
+/**
+ * netlbl_cache_add - Add an entry to a NetLabel protocol cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add the LSM security attributes for the given packet to the underlying
+ * NetLabel protocol's label mapping cache. Returns zero on success, negative
+ * values on error.
+ *
+ */
+int netlbl_cache_add(const struct sk_buff *skb,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ if (secattr->cache.data == NULL)
+ return -ENOMSG;
+
+ if (CIPSO_V4_OPTEXIST(skb))
+ return cipso_v4_cache_add(skb, secattr);
+
+ return -ENOMSG;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * netlbl_init - Initialize NetLabel
+ *
+ * Description:
+ * Perform the required NetLabel initialization before first use.
+ *
+ */
+static int __init netlbl_init(void)
+{
+ int ret_val;
+
+ printk(KERN_INFO "NetLabel: Initializing\n");
+ printk(KERN_INFO "NetLabel: domain hash size = %u\n",
+ (1 << NETLBL_DOMHSH_BITSIZE));
+ printk(KERN_INFO "NetLabel: protocols ="
+ " UNLABELED"
+ " CIPSOv4"
+ "\n");
+
+ ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
+ if (ret_val != 0)
+ goto init_failure;
+
+ ret_val = netlbl_netlink_init();
+ if (ret_val != 0)
+ goto init_failure;
+
+ ret_val = netlbl_unlabel_defconf();
+ if (ret_val != 0)
+ goto init_failure;
+ printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n");
+
+ return 0;
+
+init_failure:
+ panic("NetLabel: failed to initialize properly (%d)\n", ret_val);
+}
+
+subsys_initcall(netlbl_init);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
new file mode 100644
index 000000000000..53c9079ad2c3
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.c
@@ -0,0 +1,648 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_user.h"
+#include "netlabel_mgmt.h"
+
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+ struct netlink_callback *nl_cb;
+ struct sk_buff *skb;
+ u32 seq;
+};
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_mgmt_gnl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_MGMT_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_MGMT_A_MAX,
+};
+
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
+ [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING },
+ [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 },
+ [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 },
+ [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 },
+};
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table. See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -EINVAL;
+ struct netlbl_dom_map *entry = NULL;
+ size_t tmp_size;
+ u32 tmp_val;
+ struct netlbl_audit audit_info;
+
+ if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
+ !info->attrs[NLBL_MGMT_A_PROTOCOL])
+ goto add_failure;
+
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+ entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+ if (entry->domain == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
+ nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = netlbl_domhsh_add(entry, &audit_info);
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (!info->attrs[NLBL_MGMT_A_CV4DOI])
+ goto add_failure;
+
+ tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+ /* We should be holding a rcu_read_lock() here while we hold
+ * the result but since the entry will always be deleted when
+ * the CIPSO DOI is deleted we aren't going to keep the
+ * lock. */
+ rcu_read_lock();
+ entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+ if (entry->type_def.cipsov4 == NULL) {
+ rcu_read_unlock();
+ goto add_failure;
+ }
+ ret_val = netlbl_domhsh_add(entry, &audit_info);
+ rcu_read_unlock();
+ break;
+ default:
+ goto add_failure;
+ }
+ if (ret_val != 0)
+ goto add_failure;
+
+ return 0;
+
+add_failure:
+ if (entry)
+ kfree(entry->domain);
+ kfree(entry);
+ return ret_val;
+}
+
+/**
+ * netlbl_mgmt_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and remove the specified domain
+ * mappings. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
+{
+ char *domain;
+ struct netlbl_audit audit_info;
+
+ if (!info->attrs[NLBL_MGMT_A_DOMAIN])
+ return -EINVAL;
+
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
+ return netlbl_domhsh_remove(domain, &audit_info);
+}
+
+/**
+ * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL
+ * @entry: the domain mapping hash table entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * netlbl_domhsh_walk() function for use in generating a response for a LISTALL
+ * message. Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
+{
+ int ret_val = -ENOMEM;
+ struct netlbl_domhsh_walk_arg *cb_arg = arg;
+ void *data;
+
+ data = netlbl_netlink_hdr_put(cb_arg->skb,
+ NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq,
+ netlbl_mgmt_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_MGMT_C_LISTALL);
+ if (data == NULL)
+ goto listall_cb_failure;
+
+ ret_val = nla_put_string(cb_arg->skb,
+ NLBL_MGMT_A_DOMAIN,
+ entry->domain);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(cb_arg->skb,
+ NLBL_MGMT_A_CV4DOI,
+ entry->type_def.cipsov4->doi);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ break;
+ }
+
+ cb_arg->seq++;
+ return genlmsg_end(cb_arg->skb, data);
+
+listall_cb_failure:
+ genlmsg_cancel(cb_arg->skb, data);
+ return ret_val;
+}
+
+/**
+ * netlbl_mgmt_listall - Handle a LISTALL message
+ * @skb: the NETLINK buffer
+ * @cb: the NETLINK callback
+ *
+ * Description:
+ * Process a user generated LISTALL message and dumps the domain hash table in
+ * a form suitable for use in a kernel generated LISTALL message. Returns zero
+ * on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listall(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct netlbl_domhsh_walk_arg cb_arg;
+ u32 skip_bkt = cb->args[0];
+ u32 skip_chain = cb->args[1];
+
+ cb_arg.nl_cb = cb;
+ cb_arg.skb = skb;
+ cb_arg.seq = cb->nlh->nlmsg_seq;
+
+ netlbl_domhsh_walk(&skip_bkt,
+ &skip_chain,
+ netlbl_mgmt_listall_cb,
+ &cb_arg);
+
+ cb->args[0] = skip_bkt;
+ cb->args[1] = skip_chain;
+ return skb->len;
+}
+
+/**
+ * netlbl_mgmt_adddef - Handle an ADDDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADDDEF message and respond accordingly. Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -EINVAL;
+ struct netlbl_dom_map *entry = NULL;
+ u32 tmp_val;
+ struct netlbl_audit audit_info;
+
+ if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
+ goto adddef_failure;
+
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ ret_val = -ENOMEM;
+ goto adddef_failure;
+ }
+ entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = netlbl_domhsh_add_default(entry, &audit_info);
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (!info->attrs[NLBL_MGMT_A_CV4DOI])
+ goto adddef_failure;
+
+ tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+ /* We should be holding a rcu_read_lock() here while we hold
+ * the result but since the entry will always be deleted when
+ * the CIPSO DOI is deleted we aren't going to keep the
+ * lock. */
+ rcu_read_lock();
+ entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+ if (entry->type_def.cipsov4 == NULL) {
+ rcu_read_unlock();
+ goto adddef_failure;
+ }
+ ret_val = netlbl_domhsh_add_default(entry, &audit_info);
+ rcu_read_unlock();
+ break;
+ default:
+ goto adddef_failure;
+ }
+ if (ret_val != 0)
+ goto adddef_failure;
+
+ return 0;
+
+adddef_failure:
+ kfree(entry);
+ return ret_val;
+}
+
+/**
+ * netlbl_mgmt_removedef - Handle a REMOVEDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVEDEF message and remove the default domain
+ * mapping. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlbl_audit audit_info;
+
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ return netlbl_domhsh_remove_default(&audit_info);
+}
+
+/**
+ * netlbl_mgmt_listdef - Handle a LISTDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTDEF message and dumps the default domain
+ * mapping in a form suitable for use in a kernel generated LISTDEF message.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -ENOMEM;
+ struct sk_buff *ans_skb = NULL;
+ void *data;
+ struct netlbl_dom_map *entry;
+
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (ans_skb == NULL)
+ return -ENOMEM;
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_mgmt_gnl_family.id,
+ 0,
+ NLBL_MGMT_C_LISTDEF);
+ if (data == NULL)
+ goto listdef_failure;
+
+ rcu_read_lock();
+ entry = netlbl_domhsh_getentry(NULL);
+ if (entry == NULL) {
+ ret_val = -ENOENT;
+ goto listdef_failure_lock;
+ }
+ ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+ if (ret_val != 0)
+ goto listdef_failure_lock;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_MGMT_A_CV4DOI,
+ entry->type_def.cipsov4->doi);
+ if (ret_val != 0)
+ goto listdef_failure_lock;
+ break;
+ }
+ rcu_read_unlock();
+
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ if (ret_val != 0)
+ goto listdef_failure;
+ return 0;
+
+listdef_failure_lock:
+ rcu_read_unlock();
+listdef_failure:
+ kfree_skb(ans_skb);
+ return ret_val;
+}
+
+/**
+ * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response
+ * @skb: the skb to write to
+ * @seq: the NETLINK sequence number
+ * @cb: the NETLINK callback
+ * @protocol: the NetLabel protocol to use in the message
+ *
+ * Description:
+ * This function is to be used in conjunction with netlbl_mgmt_protocols() to
+ * answer a application's PROTOCOLS message. Returns the size of the message
+ * on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ u32 protocol)
+{
+ int ret_val = -ENOMEM;
+ void *data;
+
+ data = netlbl_netlink_hdr_put(skb,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ netlbl_mgmt_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_MGMT_C_PROTOCOLS);
+ if (data == NULL)
+ goto protocols_cb_failure;
+
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol);
+ if (ret_val != 0)
+ goto protocols_cb_failure;
+
+ return genlmsg_end(skb, data);
+
+protocols_cb_failure:
+ genlmsg_cancel(skb, data);
+ return ret_val;
+}
+
+/**
+ * netlbl_mgmt_protocols - Handle a PROTOCOLS message
+ * @skb: the NETLINK buffer
+ * @cb: the NETLINK callback
+ *
+ * Description:
+ * Process a user generated PROTOCOLS message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_protocols(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ u32 protos_sent = cb->args[0];
+
+ if (protos_sent == 0) {
+ if (netlbl_mgmt_protocols_cb(skb,
+ cb,
+ NETLBL_NLTYPE_UNLABELED) < 0)
+ goto protocols_return;
+ protos_sent++;
+ }
+ if (protos_sent == 1) {
+ if (netlbl_mgmt_protocols_cb(skb,
+ cb,
+ NETLBL_NLTYPE_CIPSOV4) < 0)
+ goto protocols_return;
+ protos_sent++;
+ }
+
+protocols_return:
+ cb->args[0] = protos_sent;
+ return skb->len;
+}
+
+/**
+ * netlbl_mgmt_version - Handle a VERSION message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated VERSION message and respond accordingly. Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -ENOMEM;
+ struct sk_buff *ans_skb = NULL;
+ void *data;
+
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (ans_skb == NULL)
+ return -ENOMEM;
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_mgmt_gnl_family.id,
+ 0,
+ NLBL_MGMT_C_VERSION);
+ if (data == NULL)
+ goto version_failure;
+
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_MGMT_A_VERSION,
+ NETLBL_PROTO_VERSION);
+ if (ret_val != 0)
+ goto version_failure;
+
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ if (ret_val != 0)
+ goto version_failure;
+ return 0;
+
+version_failure:
+ kfree_skb(ans_skb);
+ return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_mgmt_genl_c_add = {
+ .cmd = NLBL_MGMT_C_ADD,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_add,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_remove = {
+ .cmd = NLBL_MGMT_C_REMOVE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_remove,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_listall = {
+ .cmd = NLBL_MGMT_C_LISTALL,
+ .flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_mgmt_listall,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_adddef = {
+ .cmd = NLBL_MGMT_C_ADDDEF,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_adddef,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_removedef = {
+ .cmd = NLBL_MGMT_C_REMOVEDEF,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_removedef,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_listdef = {
+ .cmd = NLBL_MGMT_C_LISTDEF,
+ .flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_listdef,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_protocols = {
+ .cmd = NLBL_MGMT_C_PROTOCOLS,
+ .flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_mgmt_protocols,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_version = {
+ .cmd = NLBL_MGMT_C_VERSION,
+ .flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = netlbl_mgmt_version,
+ .dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_mgmt_genl_init - Register the NetLabel management component
+ *
+ * Description:
+ * Register the NetLabel management component with the Generic NETLINK
+ * mechanism. Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_mgmt_genl_init(void)
+{
+ int ret_val;
+
+ ret_val = genl_register_family(&netlbl_mgmt_gnl_family);
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_add);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_remove);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_listall);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_adddef);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_removedef);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_listdef);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_protocols);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+ &netlbl_mgmt_genl_c_version);
+ if (ret_val != 0)
+ return ret_val;
+
+ return 0;
+}
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
new file mode 100644
index 000000000000..3642d3bfc8eb
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.h
@@ -0,0 +1,171 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_MGMT_H
+#define _NETLABEL_MGMT_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the management interface.
+ *
+ * o ADD:
+ * Sent by an application to add a domain mapping to the NetLabel system.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_DOMAIN
+ * NLBL_MGMT_A_PROTOCOL
+ *
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *
+ * NLBL_MGMT_A_CV4DOI
+ *
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *
+ * o REMOVE:
+ * Sent by an application to remove a domain mapping from the NetLabel
+ * system.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_DOMAIN
+ *
+ * o LISTALL:
+ * This message can be sent either from an application or by the kernel in
+ * response to an application generated LISTALL message. When sent by an
+ * application there is no payload and the NLM_F_DUMP flag should be set.
+ * The kernel should respond with a series of the following messages.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_DOMAIN
+ * NLBL_MGMT_A_PROTOCOL
+ *
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *
+ * NLBL_MGMT_A_CV4DOI
+ *
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *
+ * o ADDDEF:
+ * Sent by an application to set the default domain mapping for the NetLabel
+ * system.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_PROTOCOL
+ *
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *
+ * NLBL_MGMT_A_CV4DOI
+ *
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *
+ * o REMOVEDEF:
+ * Sent by an application to remove the default domain mapping from the
+ * NetLabel system, there is no payload.
+ *
+ * o LISTDEF:
+ * This message can be sent either from an application or by the kernel in
+ * response to an application generated LISTDEF message. When sent by an
+ * application there is no payload. On success the kernel should send a
+ * response using the following format.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_PROTOCOL
+ *
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *
+ * NLBL_MGMT_A_CV4DOI
+ *
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *
+ * o PROTOCOLS:
+ * Sent by an application to request a list of configured NetLabel protocols
+ * in the kernel. When sent by an application there is no payload and the
+ * NLM_F_DUMP flag should be set. The kernel should respond with a series of
+ * the following messages.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_PROTOCOL
+ *
+ * o VERSION:
+ * Sent by an application to request the NetLabel version. When sent by an
+ * application there is no payload. This message type is also used by the
+ * kernel to respond to an VERSION request.
+ *
+ * Required attributes:
+ *
+ * NLBL_MGMT_A_VERSION
+ *
+ */
+
+/* NetLabel Management commands */
+enum {
+ NLBL_MGMT_C_UNSPEC,
+ NLBL_MGMT_C_ADD,
+ NLBL_MGMT_C_REMOVE,
+ NLBL_MGMT_C_LISTALL,
+ NLBL_MGMT_C_ADDDEF,
+ NLBL_MGMT_C_REMOVEDEF,
+ NLBL_MGMT_C_LISTDEF,
+ NLBL_MGMT_C_PROTOCOLS,
+ NLBL_MGMT_C_VERSION,
+ __NLBL_MGMT_C_MAX,
+};
+#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
+
+/* NetLabel Management attributes */
+enum {
+ NLBL_MGMT_A_UNSPEC,
+ NLBL_MGMT_A_DOMAIN,
+ /* (NLA_NUL_STRING)
+ * the NULL terminated LSM domain string */
+ NLBL_MGMT_A_PROTOCOL,
+ /* (NLA_U32)
+ * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */
+ NLBL_MGMT_A_VERSION,
+ /* (NLA_U32)
+ * the NetLabel protocol version number (defined by
+ * NETLBL_PROTO_VERSION) */
+ NLBL_MGMT_A_CV4DOI,
+ /* (NLA_U32)
+ * the CIPSOv4 DOI value */
+ __NLBL_MGMT_A_MAX,
+};
+#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_mgmt_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
new file mode 100644
index 000000000000..1833ad233b39
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -0,0 +1,280 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system. The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_user.h"
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+
+/* Accept unlabeled packets flag */
+static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_unlabel_gnl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_UNLABELED_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_UNLABEL_A_MAX,
+};
+
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
+ [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
+};
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag
+ * @value: desired value
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Set the value of the unlabeled accept flag to @value.
+ *
+ */
+static void netlbl_unlabel_acceptflg_set(u8 value,
+ struct netlbl_audit *audit_info)
+{
+ struct audit_buffer *audit_buf;
+ u8 old_val;
+
+ old_val = atomic_read(&netlabel_unlabel_accept_flg);
+ atomic_set(&netlabel_unlabel_accept_flg, value);
+
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW,
+ audit_info);
+ audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val);
+ audit_log_end(audit_buf);
+}
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_unlabel_accept - Handle an ACCEPT message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ACCEPT message and set the accept flag accordingly.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
+{
+ u8 value;
+ struct netlbl_audit audit_info;
+
+ if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
+ value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
+ if (value == 1 || value == 0) {
+ netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_unlabel_acceptflg_set(value, &audit_info);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * netlbl_unlabel_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond with the current status.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret_val = -EINVAL;
+ struct sk_buff *ans_skb;
+ void *data;
+
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (ans_skb == NULL)
+ goto list_failure;
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_unlabel_gnl_family.id,
+ 0,
+ NLBL_UNLABEL_C_LIST);
+ if (data == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure;
+ }
+
+ ret_val = nla_put_u8(ans_skb,
+ NLBL_UNLABEL_A_ACPTFLG,
+ atomic_read(&netlabel_unlabel_accept_flg));
+ if (ret_val != 0)
+ goto list_failure;
+
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ if (ret_val != 0)
+ goto list_failure;
+ return 0;
+
+list_failure:
+ kfree(ans_skb);
+ return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_unlabel_genl_c_accept = {
+ .cmd = NLBL_UNLABEL_C_ACCEPT,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_unlabel_genl_policy,
+ .doit = netlbl_unlabel_accept,
+ .dumpit = NULL,
+};
+
+static struct genl_ops netlbl_unlabel_genl_c_list = {
+ .cmd = NLBL_UNLABEL_C_LIST,
+ .flags = 0,
+ .policy = netlbl_unlabel_genl_policy,
+ .doit = netlbl_unlabel_list,
+ .dumpit = NULL,
+};
+
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
+ *
+ * Description:
+ * Register the unlabeled packet NetLabel component with the Generic NETLINK
+ * mechanism. Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_unlabel_genl_init(void)
+{
+ int ret_val;
+
+ ret_val = genl_register_family(&netlbl_unlabel_gnl_family);
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+ &netlbl_unlabel_genl_c_accept);
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+ &netlbl_unlabel_genl_c_list);
+ if (ret_val != 0)
+ return ret_val;
+
+ return 0;
+}
+
+/*
+ * NetLabel KAPI Hooks
+ */
+
+/**
+ * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Determine the security attributes, if any, for an unlabled packet and return
+ * them in @secattr. Returns zero on success and negative values on failure.
+ *
+ */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
+{
+ if (atomic_read(&netlabel_unlabel_accept_flg) == 1)
+ return netlbl_secattr_init(secattr);
+
+ return -ENOMSG;
+}
+
+/**
+ * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
+ *
+ * Description:
+ * Set the default NetLabel configuration to allow incoming unlabeled packets
+ * and to send unlabeled network traffic by default.
+ *
+ */
+int netlbl_unlabel_defconf(void)
+{
+ int ret_val;
+ struct netlbl_dom_map *entry;
+ struct netlbl_audit audit_info;
+
+ /* Only the kernel is allowed to call this function and the only time
+ * it is called is at bootup before the audit subsystem is reporting
+ * messages so don't worry to much about these values. */
+ security_task_getsecid(current, &audit_info.secid);
+ audit_info.loginuid = 0;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL)
+ return -ENOMEM;
+ entry->type = NETLBL_NLTYPE_UNLABELED;
+ ret_val = netlbl_domhsh_add_default(entry, &audit_info);
+ if (ret_val != 0)
+ return ret_val;
+
+ netlbl_unlabel_acceptflg_set(1, &audit_info);
+
+ return 0;
+}
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
new file mode 100644
index 000000000000..c2917fbb42cf
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -0,0 +1,89 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system. The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_UNLABELED_H
+#define _NETLABEL_UNLABELED_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the Unlabeled subsystem.
+ *
+ * o ACCEPT
+ * This message is sent from an application to specify if the kernel should
+ * allow unlabled packets to pass if they do not match any of the static
+ * mappings defined in the unlabeled module.
+ *
+ * Required attributes:
+ *
+ * NLBL_UNLABEL_A_ACPTFLG
+ *
+ * o LIST
+ * This message can be sent either from an application or by the kernel in
+ * response to an application generated LIST message. When sent by an
+ * application there is no payload. The kernel should respond to a LIST
+ * message with a LIST message on success.
+ *
+ * Required attributes:
+ *
+ * NLBL_UNLABEL_A_ACPTFLG
+ *
+ */
+
+/* NetLabel Unlabeled commands */
+enum {
+ NLBL_UNLABEL_C_UNSPEC,
+ NLBL_UNLABEL_C_ACCEPT,
+ NLBL_UNLABEL_C_LIST,
+ __NLBL_UNLABEL_C_MAX,
+};
+#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
+
+/* NetLabel Unlabeled attributes */
+enum {
+ NLBL_UNLABEL_A_UNSPEC,
+ NLBL_UNLABEL_A_ACPTFLG,
+ /* (NLA_U8)
+ * if true then unlabeled packets are allowed to pass, else unlabeled
+ * packets are rejected */
+ __NLBL_UNLABEL_A_MAX,
+};
+#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_unlabel_genl_init(void);
+
+/* Process Unlabeled incoming network packets */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr);
+
+/* Set the default configuration to allow Unlabeled packets */
+int netlbl_unlabel_defconf(void);
+
+#endif
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
new file mode 100644
index 000000000000..98a416381e61
--- /dev/null
+++ b/net/netlabel/netlabel_user.c
@@ -0,0 +1,117 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <linux/audit.h>
+#include <linux/tty.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_cipso_v4.h"
+#include "netlabel_user.h"
+
+/*
+ * NetLabel NETLINK Setup Functions
+ */
+
+/**
+ * netlbl_netlink_init - Initialize the NETLINK communication channel
+ *
+ * Description:
+ * Call out to the NetLabel components so they can register their families and
+ * commands with the Generic NETLINK mechanism. Returns zero on success and
+ * non-zero on failure.
+ *
+ */
+int netlbl_netlink_init(void)
+{
+ int ret_val;
+
+ ret_val = netlbl_mgmt_genl_init();
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = netlbl_cipsov4_genl_init();
+ if (ret_val != 0)
+ return ret_val;
+
+ ret_val = netlbl_unlabel_genl_init();
+ if (ret_val != 0)
+ return ret_val;
+
+ return 0;
+}
+
+/*
+ * NetLabel Audit Functions
+ */
+
+/**
+ * netlbl_audit_start_common - Start an audit message
+ * @type: audit message type
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Start an audit message using the type specified in @type and fill the audit
+ * message with some fields common to all NetLabel audit messages. Returns
+ * a pointer to the audit buffer on success, NULL on failure.
+ *
+ */
+struct audit_buffer *netlbl_audit_start_common(int type,
+ struct netlbl_audit *audit_info)
+{
+ struct audit_context *audit_ctx = current->audit_context;
+ struct audit_buffer *audit_buf;
+ char *secctx;
+ u32 secctx_len;
+
+ audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type);
+ if (audit_buf == NULL)
+ return NULL;
+
+ audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid);
+
+ if (audit_info->secid != 0 &&
+ security_secid_to_secctx(audit_info->secid,
+ &secctx,
+ &secctx_len) == 0)
+ audit_log_format(audit_buf, " subj=%s", secctx);
+
+ return audit_buf;
+}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
new file mode 100644
index 000000000000..47967ef32964
--- /dev/null
+++ b/net/netlabel/netlabel_user.h
@@ -0,0 +1,96 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system. The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_USER_H
+#define _NETLABEL_USER_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/capability.h>
+#include <linux/audit.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+
+/* NetLabel NETLINK helper functions */
+
+/**
+ * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet. Returns a pointer to the start of the payload buffer
+ * on success or NULL on failure.
+ *
+ */
+static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
+ u32 pid,
+ u32 seq,
+ int type,
+ int flags,
+ u8 cmd)
+{
+ return genlmsg_put(skb,
+ pid,
+ seq,
+ type,
+ 0,
+ flags,
+ cmd,
+ NETLBL_PROTO_VERSION);
+}
+
+/**
+ * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
+ * @skb: the packet
+ * @audit_info: NetLabel audit information
+ */
+static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
+ struct netlbl_audit *audit_info)
+{
+ audit_info->secid = NETLINK_CB(skb).sid;
+ audit_info->loginuid = NETLINK_CB(skb).loginuid;
+}
+
+/* NetLabel NETLINK I/O functions */
+
+int netlbl_netlink_init(void);
+
+/* NetLabel Audit Functions */
+
+struct audit_buffer *netlbl_audit_start_common(int type,
+ struct netlbl_audit *audit_info);
+
+#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3862e73d14d7..d56e0d21f919 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -21,7 +21,6 @@
* mandatory if CONFIG_NET=y these days
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/capability.h>
@@ -157,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
static void netlink_table_grab(void)
{
- write_lock_bh(&nl_table_lock);
+ write_lock_irq(&nl_table_lock);
if (atomic_read(&nl_table_users)) {
DECLARE_WAITQUEUE(wait, current);
@@ -167,9 +166,9 @@ static void netlink_table_grab(void)
set_current_state(TASK_UNINTERRUPTIBLE);
if (atomic_read(&nl_table_users) == 0)
break;
- write_unlock_bh(&nl_table_lock);
+ write_unlock_irq(&nl_table_lock);
schedule();
- write_lock_bh(&nl_table_lock);
+ write_lock_irq(&nl_table_lock);
}
__set_current_state(TASK_RUNNING);
@@ -179,7 +178,7 @@ static void netlink_table_grab(void)
static __inline__ void netlink_table_ungrab(void)
{
- write_unlock_bh(&nl_table_lock);
+ write_unlock_irq(&nl_table_lock);
wake_up(&nl_table_wait);
}
@@ -563,10 +562,9 @@ static int netlink_alloc_groups(struct sock *sk)
if (err)
return err;
- nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL);
+ nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
if (nlk->groups == NULL)
return -ENOMEM;
- memset(nlk->groups, 0, NLGRPSZ(groups));
nlk->ngroups = groups;
return 0;
}
@@ -1149,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (len > sk->sk_sndbuf - 32)
goto out;
err = -ENOBUFS;
- skb = alloc_skb(len, GFP_KERNEL);
+ skb = nlmsg_new(len, GFP_KERNEL);
if (skb==NULL)
goto out;
@@ -1275,8 +1273,7 @@ netlink_kernel_create(int unit, unsigned int groups,
struct netlink_sock *nlk;
unsigned long *listeners = NULL;
- if (!nl_table)
- return NULL;
+ BUG_ON(!nl_table);
if (unit<0 || unit>=MAX_LINKS)
return NULL;
@@ -1344,19 +1341,18 @@ static int netlink_dump(struct sock *sk)
struct netlink_callback *cb;
struct sk_buff *skb;
struct nlmsghdr *nlh;
- int len;
+ int len, err = -ENOBUFS;
skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ goto errout;
spin_lock(&nlk->cb_lock);
cb = nlk->cb;
if (cb == NULL) {
- spin_unlock(&nlk->cb_lock);
- kfree_skb(skb);
- return -EINVAL;
+ err = -EINVAL;
+ goto errout_skb;
}
len = cb->dump(skb, cb);
@@ -1368,8 +1364,12 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
+ if (!nlh)
+ goto errout_skb;
+
+ memcpy(nlmsg_data(nlh), &len, sizeof(len));
+
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
@@ -1381,8 +1381,11 @@ static int netlink_dump(struct sock *sk)
netlink_destroy_callback(cb);
return 0;
-nlmsg_failure:
- return -ENOBUFS;
+errout_skb:
+ spin_unlock(&nlk->cb_lock);
+ kfree_skb(skb);
+errout:
+ return err;
}
int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1394,11 +1397,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct sock *sk;
struct netlink_sock *nlk;
- cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
if (cb == NULL)
return -ENOBUFS;
- memset(cb, 0, sizeof(*cb));
cb->dump = dump;
cb->done = done;
cb->nlh = nlh;
@@ -1435,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
int size;
if (err == 0)
- size = NLMSG_SPACE(sizeof(struct nlmsgerr));
+ size = nlmsg_total_size(sizeof(*errmsg));
else
- size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
+ size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
- skb = alloc_skb(size, GFP_KERNEL);
+ skb = nlmsg_new(size, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -1455,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
- errmsg = NLMSG_DATA(rep);
+ errmsg = nlmsg_data(rep);
errmsg->error = err;
- memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
+ memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
}
static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *, int *))
{
- unsigned int total_len;
struct nlmsghdr *nlh;
int err;
@@ -1474,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
return 0;
- total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
-
if (cb(skb, nlh, &err) < 0) {
/* Not an error, but we have to interrupt processing
* here. Note: that in this case we do not pull
@@ -1487,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
} else if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
- skb_pull(skb, total_len);
+ netlink_queue_skip(nlh, skb);
}
return 0;
@@ -1550,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
skb_pull(skb, msglen);
}
+/**
+ * nlmsg_notify - send a notification netlink message
+ * @sk: netlink socket to use
+ * @skb: notification message
+ * @pid: destination netlink pid for reports or 0
+ * @group: destination multicast group or 0
+ * @report: 1 to report back, 0 to disable
+ * @flags: allocation flags
+ */
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+ unsigned int group, int report, gfp_t flags)
+{
+ int err = 0;
+
+ if (group) {
+ int exclude_pid = 0;
+
+ if (report) {
+ atomic_inc(&skb->users);
+ exclude_pid = pid;
+ }
+
+ /* errors reported via destination sk->sk_err */
+ nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+ }
+
+ if (report)
+ err = nlmsg_unicast(sk, skb, pid);
+
+ return err;
+}
+
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
int link;
@@ -1669,7 +1700,7 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
struct nl_seq_iter *iter;
int err;
- iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
@@ -1679,7 +1710,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
return err;
}
- memset(iter, 0, sizeof(*iter));
seq = file->private_data;
seq->private = iter;
return 0;
@@ -1732,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = {
.owner = THIS_MODULE, /* for consistency 8) */
};
-extern void netlink_skb_parms_too_large(void);
-
static int __init netlink_proto_init(void)
{
struct sk_buff *dummy_skb;
@@ -1745,17 +1773,11 @@ static int __init netlink_proto_init(void)
if (err != 0)
goto out;
- if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
- netlink_skb_parms_too_large();
-
- nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
- if (!nl_table) {
-enomem:
- printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
- return -ENOMEM;
- }
+ BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
- memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
+ nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
+ if (!nl_table)
+ goto panic;
if (num_physpages >= (128 * 1024))
max = num_physpages >> (21 - PAGE_SHIFT);
@@ -1775,7 +1797,7 @@ enomem:
nl_pid_hash_free(nl_table[i].hash.table,
1 * sizeof(*hash->table));
kfree(nl_table);
- goto enomem;
+ goto panic;
}
memset(hash->table, 0, 1 * sizeof(*hash->table));
hash->max_shift = order;
@@ -1792,6 +1814,8 @@ enomem:
rtnetlink_init();
out:
return err;
+panic:
+ panic("netlink_init: Cannot allocate nl_table\n");
}
core_initcall(netlink_proto_init);
@@ -1807,4 +1831,4 @@ EXPORT_SYMBOL(netlink_set_err);
EXPORT_SYMBOL(netlink_set_nonroot);
EXPORT_SYMBOL(netlink_unicast);
EXPORT_SYMBOL(netlink_unregister_notifier);
-
+EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index fffef4ab276f..004139557e09 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -5,7 +5,6 @@
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -21,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
[NLA_U16] = sizeof(u16),
[NLA_U32] = sizeof(u32),
[NLA_U64] = sizeof(u64),
- [NLA_STRING] = 1,
[NLA_NESTED] = NLA_HDRLEN,
};
@@ -29,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype,
struct nla_policy *policy)
{
struct nla_policy *pt;
- int minlen = 0;
+ int minlen = 0, attrlen = nla_len(nla);
if (nla->nla_type <= 0 || nla->nla_type > maxtype)
return 0;
@@ -38,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype,
BUG_ON(pt->type > NLA_TYPE_MAX);
- if (pt->minlen)
- minlen = pt->minlen;
- else if (pt->type != NLA_UNSPEC)
- minlen = nla_attr_minlen[pt->type];
+ switch (pt->type) {
+ case NLA_FLAG:
+ if (attrlen > 0)
+ return -ERANGE;
+ break;
- if (pt->type == NLA_FLAG && nla_len(nla) > 0)
- return -ERANGE;
+ case NLA_NUL_STRING:
+ if (pt->len)
+ minlen = min_t(int, attrlen, pt->len + 1);
+ else
+ minlen = attrlen;
- if (nla_len(nla) < minlen)
- return -ERANGE;
+ if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
+ return -EINVAL;
+ /* fall through */
+
+ case NLA_STRING:
+ if (attrlen < 1)
+ return -ERANGE;
+
+ if (pt->len) {
+ char *buf = nla_data(nla);
+
+ if (buf[attrlen - 1] == '\0')
+ attrlen--;
+
+ if (attrlen > pt->len)
+ return -ERANGE;
+ }
+ break;
+
+ default:
+ if (pt->len)
+ minlen = pt->len;
+ else if (pt->type != NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (attrlen < minlen)
+ return -ERANGE;
+ }
return 0;
}
@@ -256,6 +284,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
}
/**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+ void *start;
+
+ start = skb_put(skb, NLA_ALIGN(attrlen));
+ memset(start, 0, NLA_ALIGN(attrlen));
+
+ return start;
+}
+
+/**
* nla_reserve - reserve room for attribute on the skb
* @skb: socket buffer to reserve room on
* @attrtype: attribute type
@@ -276,6 +324,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
}
/**
+ * nla_reserve - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @len: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+ if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+ return NULL;
+
+ return __nla_reserve_nohdr(skb, attrlen);
+}
+
+/**
* __nla_put - Add a netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
@@ -294,6 +360,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
memcpy(nla_data(nla), data, attrlen);
}
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+ void *start;
+
+ start = __nla_reserve_nohdr(skb, attrlen);
+ memcpy(start, data, attrlen);
+}
/**
* nla_put - Add a netlink attribute to a socket buffer
@@ -314,15 +396,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
return 0;
}
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+ if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+ return -1;
+
+ __nla_put_nohdr(skb, attrlen, data);
+ return 0;
+}
EXPORT_SYMBOL(nla_validate);
EXPORT_SYMBOL(nla_parse);
EXPORT_SYMBOL(nla_find);
EXPORT_SYMBOL(nla_strlcpy);
EXPORT_SYMBOL(__nla_reserve);
+EXPORT_SYMBOL(__nla_reserve_nohdr);
EXPORT_SYMBOL(nla_reserve);
+EXPORT_SYMBOL(nla_reserve_nohdr);
EXPORT_SYMBOL(__nla_put);
+EXPORT_SYMBOL(__nla_put_nohdr);
EXPORT_SYMBOL(nla_put);
+EXPORT_SYMBOL(nla_put_nohdr);
EXPORT_SYMBOL(nla_memcpy);
EXPORT_SYMBOL(nla_memcmp);
EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f329b72578f5..49bc2db7982b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -5,7 +5,6 @@
* Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -320,7 +319,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) {
+ if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
err = -EPERM;
goto errout;
}
@@ -388,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len)
static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
+ struct nlattr *nla_ops;
+ struct genl_ops *ops;
void *hdr;
+ int idx = 1;
hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
family->version);
@@ -397,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
+ NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version);
+ NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize);
+ NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr);
+
+ nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
+ if (nla_ops == NULL)
+ goto nla_put_failure;
+
+ list_for_each_entry(ops, &family->ops_list, ops_list) {
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, idx++);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
+ NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
+
+ if (ops->policy)
+ NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY);
+
+ if (ops->doit)
+ NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT);
+
+ if (ops->dumpit)
+ NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT);
+
+ nla_nest_end(skb, nest);
+ }
+
+ nla_nest_end(skb, nla_ops);
return genlmsg_end(skb, hdr);
@@ -412,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
int chains_to_skip = cb->args[0];
int fams_to_skip = cb->args[1];
+ if (chains_to_skip != 0)
+ genl_lock();
+
for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
if (i < chains_to_skip)
continue;
@@ -429,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
}
errout:
+ if (chains_to_skip != 0)
+ genl_unlock();
+
cb->args[0] = i;
cb->args[1] = n;
@@ -441,7 +480,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
struct sk_buff *skb;
int err;
- skb = nlmsg_new(NLMSG_GOODSIZE);
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
return ERR_PTR(-ENOBUFS);
@@ -456,7 +495,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
[CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 },
- [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING },
+ [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING,
+ .len = GENL_NAMSIZ - 1 },
};
static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
@@ -471,12 +511,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
- char name[GENL_NAMSIZ];
-
- if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
- GENL_NAMSIZ) >= GENL_NAMSIZ)
- goto errout;
+ char *name;
+ name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
res = genl_family_find_byname(name);
}
@@ -511,7 +548,7 @@ static int genl_ctrl_event(int event, void *data)
if (IS_ERR(msg))
return PTR_ERR(msg);
- genlmsg_multicast(msg, 0, GENL_ID_CTRL);
+ genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
break;
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 3669cb953e6e..1d50f801f181 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -8,7 +8,6 @@
* Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
* Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/capability.h>
@@ -67,6 +66,14 @@ static DEFINE_SPINLOCK(nr_list_lock);
static const struct proto_ops nr_proto_ops;
/*
+ * NETROM network devices are virtual network devices encapsulating NETROM
+ * frames into AX.25 which will be sent through an AX.25 device, so form a
+ * special "super class" of normal net devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key nr_netdev_xmit_lock_key;
+
+/*
* Socket removal during an interrupt is now safe.
*/
static void nr_remove_socket(struct sock *sk)
@@ -801,7 +808,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
/* Now attach up the new socket */
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsock->sk = newsk;
out:
@@ -986,19 +993,19 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
nr_make->vr = 0;
nr_make->vl = 0;
nr_make->state = NR_STATE_3;
- sk->sk_ack_backlog++;
-
- nr_insert_socket(make);
-
+ sk_acceptq_added(sk);
skb_queue_head(&sk->sk_receive_queue, skb);
- nr_start_heartbeat(make);
- nr_start_idletimer(make);
-
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, skb->len);
bh_unlock_sock(sk);
+
+ nr_insert_socket(make);
+
+ nr_start_heartbeat(make);
+ nr_start_idletimer(make);
+
return 1;
}
@@ -1383,14 +1390,12 @@ static int __init nr_proto_init(void)
return -1;
}
- dev_nr = kmalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+ dev_nr = kzalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL);
if (dev_nr == NULL) {
printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
return -1;
}
- memset(dev_nr, 0x00, nr_ndevs * sizeof(struct net_device *));
-
for (i = 0; i < nr_ndevs; i++) {
char name[IFNAMSIZ];
struct net_device *dev;
@@ -1408,6 +1413,7 @@ static int __init nr_proto_init(void)
free_netdev(dev);
goto fail;
}
+ lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key);
dev_nr[i] = dev;
}
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 621e5586ab03..9b8eb54971ab 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -6,7 +6,6 @@
*
* Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index b3b9097c87c7..c11737f472d6 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -725,15 +725,17 @@ void nr_link_failed(ax25_cb *ax25, int reason)
struct nr_node *nr_node = NULL;
spin_lock_bh(&nr_neigh_list_lock);
- nr_neigh_for_each(s, node, &nr_neigh_list)
+ nr_neigh_for_each(s, node, &nr_neigh_list) {
if (s->ax25 == ax25) {
nr_neigh_hold(s);
nr_neigh = s;
break;
}
+ }
spin_unlock_bh(&nr_neigh_list_lock);
- if (nr_neigh == NULL) return;
+ if (nr_neigh == NULL)
+ return;
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
@@ -743,11 +745,13 @@ void nr_link_failed(ax25_cb *ax25, int reason)
return;
}
spin_lock_bh(&nr_node_list_lock);
- nr_node_for_each(nr_node, node, &nr_node_list)
+ nr_node_for_each(nr_node, node, &nr_node_list) {
nr_node_lock(nr_node);
- if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh)
+ if (nr_node->which < nr_node->count &&
+ nr_node->routes[nr_node->which].neighbour == nr_neigh)
nr_node->which++;
nr_node_unlock(nr_node);
+ }
spin_unlock_bh(&nr_node_list_lock);
nr_neigh_put(nr_neigh);
}
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 75b72d389ba9..ddba1c144260 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -138,8 +138,8 @@ static void nr_heartbeat_expiry(unsigned long param)
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
sock_hold(sk);
- nr_destroy_socket(sk);
bh_unlock_sock(sk);
+ nr_destroy_socket(sk);
sock_put(sk);
return;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9db7dbdb16e6..f4ccb90e6739 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -49,7 +49,6 @@
*
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -428,21 +427,24 @@ out_unlock:
}
#endif
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline int run_filter(struct sk_buff *skb, struct sock *sk,
+ unsigned *snaplen)
{
struct sk_filter *filter;
+ int err = 0;
- bh_lock_sock(sk);
- filter = sk->sk_filter;
- /*
- * Our caller already checked that filter != NULL but we need to
- * verify that under bh_lock_sock() to be safe
- */
- if (likely(filter != NULL))
- res = sk_run_filter(skb, filter->insns, filter->len);
- bh_unlock_sock(sk);
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter != NULL) {
+ err = sk_run_filter(skb, filter->insns, filter->len);
+ if (!err)
+ err = -EPERM;
+ else if (*snaplen > err)
+ *snaplen = err;
+ }
+ rcu_read_unlock_bh();
- return res;
+ return err;
}
/*
@@ -492,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ if (run_filter(skb, sk, &snaplen) < 0)
+ goto drop_n_restore;
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
@@ -587,20 +584,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
skb_pull(skb, skb->nh.raw - skb->data);
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
}
}
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ if (run_filter(skb, sk, &snaplen) < 0)
+ goto drop_n_restore;
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
@@ -627,8 +619,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
if ((int)snaplen < 0)
snaplen = 0;
}
- if (snaplen > skb->len-skb->data_len)
- snaplen = skb->len-skb->data_len;
spin_lock(&sk->sk_receive_queue.lock);
h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
@@ -645,7 +635,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
status &= ~TP_STATUS_LOSING;
spin_unlock(&sk->sk_receive_queue.lock);
- memcpy((u8*)h + macoff, skb->data, snaplen);
+ skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
h->tp_len = skb->len;
h->tp_snaplen = snaplen;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 55564efccf11..08a542855654 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -10,7 +10,6 @@
* Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
*/
-#include <linux/config.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -68,6 +67,14 @@ static struct proto_ops rose_proto_ops;
ax25_address rose_callsign;
/*
+ * ROSE network devices are virtual network devices encapsulating ROSE
+ * frames into AX.25 which will be sent through an AX.25 device, so form a
+ * special "super class" of normal net devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key rose_netdev_xmit_lock_key;
+
+/*
* Convert a ROSE address into text.
*/
const char *rose2asc(const rose_address *addr)
@@ -753,7 +760,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
rose_insert_socket(sk); /* Finish the bind */
}
-
+rose_try_next_neigh:
rose->dest_addr = addr->srose_addr;
rose->dest_call = addr->srose_call;
rose->rand = ((long)rose & 0xFFFF) + rose->lci;
@@ -811,6 +818,11 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
}
if (sk->sk_state != TCP_ESTABLISHED) {
+ /* Try next neighbour */
+ rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
+ if (rose->neighbour)
+ goto rose_try_next_neigh;
+ /* No more neighbour */
sock->state = SS_UNCONNECTED;
return sock_error(sk); /* Always set at this point */
}
@@ -1486,14 +1498,13 @@ static int __init rose_proto_init(void)
rose_callsign = null_ax25_address;
- dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+ dev_rose = kzalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL);
if (dev_rose == NULL) {
printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n");
rc = -ENOMEM;
goto out_proto_unregister;
}
- memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*));
for (i = 0; i < rose_ndevs; i++) {
struct net_device *dev;
char name[IFNAMSIZ];
@@ -1512,6 +1523,7 @@ static int __init rose_proto_init(void)
free_netdev(dev);
goto fail;
}
+ lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key);
dev_rose[i] = dev;
}
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 2a1bf8e119e5..7c279e2659ec 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -6,7 +6,6 @@
*
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
@@ -60,6 +59,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
struct net_device_stats *stats = netdev_priv(dev);
unsigned char *bp = (unsigned char *)skb->data;
struct sk_buff *skbn;
+ unsigned int len;
#ifdef CONFIG_INET
if (arp_find(bp + 7, skb)) {
@@ -76,6 +76,8 @@ static int rose_rebuild_header(struct sk_buff *skb)
kfree_skb(skb);
+ len = skbn->len;
+
if (!rose_route_frame(skbn, NULL)) {
kfree_skb(skbn);
stats->tx_errors++;
@@ -83,7 +85,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
}
stats->tx_packets++;
- stats->tx_bytes += skbn->len;
+ stats->tx_bytes += len;
#endif
return 1;
}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
index c4aeb7d40266..d07122b57e0d 100644
--- a/net/rxrpc/call.c
+++ b/net/rxrpc/call.c
@@ -1098,8 +1098,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
call->app_ready_seq = pmsg->seq;
call->app_ready_qty += pmsg->dsize;
- list_del_init(&pmsg->link);
- list_add_tail(&pmsg->link, &call->app_readyq);
+ list_move_tail(&pmsg->link, &call->app_readyq);
}
/* see if we've got the last packet yet */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 0e0a4553499f..93d2c55ad2d5 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -58,13 +58,12 @@ static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
_enter("%p",peer);
/* allocate and initialise a connection record */
- conn = kmalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
+ conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
if (!conn) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
- memset(conn, 0, sizeof(struct rxrpc_connection));
atomic_set(&conn->usage, 1);
INIT_LIST_HEAD(&conn->link);
@@ -402,8 +401,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
/* move to graveyard queue */
_debug("burying connection: {%08x}", ntohl(conn->conn_id));
- list_del(&conn->link);
- list_add_tail(&conn->link, &peer->conn_graveyard);
+ list_move_tail(&conn->link, &peer->conn_graveyard);
rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
@@ -536,13 +534,12 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
return -EINVAL;
}
- msg = kmalloc(sizeof(struct rxrpc_message), alloc_flags);
+ msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
if (!msg) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
- memset(msg, 0, sizeof(*msg));
atomic_set(&msg->usage, 1);
INIT_LIST_HEAD(&msg->link);
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
index 1aadd026d354..cea4eb5e2497 100644
--- a/net/rxrpc/krxsecd.c
+++ b/net/rxrpc/krxsecd.c
@@ -160,8 +160,7 @@ void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
msg = list_entry(_p, struct rxrpc_message, link);
if (msg->trans == trans) {
- list_del(&msg->link);
- list_add_tail(&msg->link, &tmp);
+ list_move_tail(&msg->link, &tmp);
atomic_dec(&rxrpc_krxsecd_qcount);
}
}
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
index ed38f5b17c1b..8a275157a3bb 100644
--- a/net/rxrpc/peer.c
+++ b/net/rxrpc/peer.c
@@ -58,13 +58,12 @@ static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
_enter("%p,%08x", trans, ntohl(addr));
/* allocate and initialise a peer record */
- peer = kmalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
+ peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
if (!peer) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
- memset(peer, 0, sizeof(struct rxrpc_peer));
atomic_set(&peer->usage, 1);
INIT_LIST_HEAD(&peer->link);
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
index 56adf16fed0c..9896fd87a4d4 100644
--- a/net/rxrpc/rxrpc_syms.c
+++ b/net/rxrpc/rxrpc_syms.c
@@ -9,7 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <rxrpc/transport.h>
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index fbf98729c748..6374df7e77d1 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -9,7 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/module.h>
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index dbe6105e83a5..465efc86fccf 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -68,11 +68,10 @@ int rxrpc_create_transport(unsigned short port,
_enter("%hu", port);
- trans = kmalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
+ trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
if (!trans)
return -ENOMEM;
- memset(trans, 0, sizeof(struct rxrpc_transport));
atomic_set(&trans->usage, 1);
INIT_LIST_HEAD(&trans->services);
INIT_LIST_HEAD(&trans->link);
@@ -312,13 +311,12 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
_enter("");
- msg = kmalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
+ msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
if (!msg) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
- memset(msg, 0, sizeof(*msg));
atomic_set(&msg->usage, 1);
list_add_tail(&msg->link,msgq);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 13eeee582886..8298ea9ffe19 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -305,7 +305,7 @@ config NET_CLS_U32
tristate "Universal 32bit comparisons w/ hashing (U32)"
select NET_CLS
---help---
- Say Y here to be able to classify packetes using a universal
+ Say Y here to be able to classify packets using a universal
32bit pieces based comparison scheme.
To compile this code as a module, choose M here: the
@@ -485,7 +485,7 @@ config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
---help---
- Say Y here to be able to invoke iptables targets after succesful
+ Say Y here to be able to invoke iptables targets after successful
classification.
To compile this code as a module, choose M here: the
@@ -537,8 +537,8 @@ config NET_ESTIMATOR
---help---
Say Y here to allow using rate estimators to estimate the current
rate-of-flow for network devices, queues, etc. This module is
- automaticaly selected if needed but can be selected manually for
- statstical purposes.
+ automatically selected if needed but can be selected manually for
+ statistical purposes.
endif # NET_SCHED
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2ffa11c6e8de..835070e9169c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -14,7 +14,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -34,16 +33,230 @@
#include <net/sch_generic.h>
#include <net/act_api.h>
-#if 0 /* control */
-#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define DPRINTK(format, args...)
+void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+ unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+ struct tcf_common **p1p;
+
+ for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+ if (*p1p == p) {
+ write_lock_bh(hinfo->lock);
+ *p1p = p->tcfc_next;
+ write_unlock_bh(hinfo->lock);
+#ifdef CONFIG_NET_ESTIMATOR
+ gen_kill_estimator(&p->tcfc_bstats,
+ &p->tcfc_rate_est);
#endif
-#if 0 /* data */
-#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define D2PRINTK(format, args...)
+ kfree(p);
+ return;
+ }
+ }
+ BUG_TRAP(0);
+}
+EXPORT_SYMBOL(tcf_hash_destroy);
+
+int tcf_hash_release(struct tcf_common *p, int bind,
+ struct tcf_hashinfo *hinfo)
+{
+ int ret = 0;
+
+ if (p) {
+ if (bind)
+ p->tcfc_bindcnt--;
+
+ p->tcfc_refcnt--;
+ if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
+ tcf_hash_destroy(p, hinfo);
+ ret = 1;
+ }
+ }
+ return ret;
+}
+EXPORT_SYMBOL(tcf_hash_release);
+
+static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tc_action *a, struct tcf_hashinfo *hinfo)
+{
+ struct tcf_common *p;
+ int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+ struct rtattr *r ;
+
+ read_lock(hinfo->lock);
+
+ s_i = cb->args[0];
+
+ for (i = 0; i < (hinfo->hmask + 1); i++) {
+ p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+ for (; p; p = p->tcfc_next) {
+ index++;
+ if (index < s_i)
+ continue;
+ a->priv = p;
+ a->order = n_i;
+ r = (struct rtattr*) skb->tail;
+ RTA_PUT(skb, a->order, 0, NULL);
+ err = tcf_action_dump_1(skb, a, 0, 0);
+ if (err < 0) {
+ index--;
+ skb_trim(skb, (u8*)r - skb->data);
+ goto done;
+ }
+ r->rta_len = skb->tail - (u8*)r;
+ n_i++;
+ if (n_i >= TCA_ACT_MAX_PRIO)
+ goto done;
+ }
+ }
+done:
+ read_unlock(hinfo->lock);
+ if (n_i)
+ cb->args[0] += n_i;
+ return n_i;
+
+rtattr_failure:
+ skb_trim(skb, (u8*)r - skb->data);
+ goto done;
+}
+
+static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_hashinfo *hinfo)
+{
+ struct tcf_common *p, *s_p;
+ struct rtattr *r ;
+ int i= 0, n_i = 0;
+
+ r = (struct rtattr*) skb->tail;
+ RTA_PUT(skb, a->order, 0, NULL);
+ RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+ for (i = 0; i < (hinfo->hmask + 1); i++) {
+ p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+ while (p != NULL) {
+ s_p = p->tcfc_next;
+ if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
+ module_put(a->ops->owner);
+ n_i++;
+ p = s_p;
+ }
+ }
+ RTA_PUT(skb, TCA_FCNT, 4, &n_i);
+ r->rta_len = skb->tail - (u8*)r;
+
+ return n_i;
+rtattr_failure:
+ skb_trim(skb, (u8*)r - skb->data);
+ return -EINVAL;
+}
+
+int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+ int type, struct tc_action *a)
+{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
+ if (type == RTM_DELACTION) {
+ return tcf_del_walker(skb, a, hinfo);
+ } else if (type == RTM_GETACTION) {
+ return tcf_dump_walker(skb, cb, a, hinfo);
+ } else {
+ printk("tcf_generic_walker: unknown action %d\n", type);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(tcf_generic_walker);
+
+struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+{
+ struct tcf_common *p;
+
+ read_lock(hinfo->lock);
+ for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+ p = p->tcfc_next) {
+ if (p->tcfc_index == index)
+ break;
+ }
+ read_unlock(hinfo->lock);
+
+ return p;
+}
+EXPORT_SYMBOL(tcf_hash_lookup);
+
+u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+ u32 val = *idx_gen;
+
+ do {
+ if (++val == 0)
+ val = 1;
+ } while (tcf_hash_lookup(val, hinfo));
+
+ return (*idx_gen = val);
+}
+EXPORT_SYMBOL(tcf_hash_new_index);
+
+int tcf_hash_search(struct tc_action *a, u32 index)
+{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tcf_common *p = tcf_hash_lookup(index, hinfo);
+
+ if (p) {
+ a->priv = p;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(tcf_hash_search);
+
+struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
+ struct tcf_hashinfo *hinfo)
+{
+ struct tcf_common *p = NULL;
+ if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+ if (bind) {
+ p->tcfc_bindcnt++;
+ p->tcfc_refcnt++;
+ }
+ a->priv = p;
+ }
+ return p;
+}
+EXPORT_SYMBOL(tcf_hash_check);
+
+struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+ struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+
+ if (unlikely(!p))
+ return p;
+ p->tcfc_refcnt = 1;
+ if (bind)
+ p->tcfc_bindcnt = 1;
+
+ spin_lock_init(&p->tcfc_lock);
+ p->tcfc_stats_lock = &p->tcfc_lock;
+ p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
+ p->tcfc_tm.install = jiffies;
+ p->tcfc_tm.lastuse = jiffies;
+#ifdef CONFIG_NET_ESTIMATOR
+ if (est)
+ gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+ p->tcfc_stats_lock, est);
#endif
+ a->priv = (void *) p;
+ return p;
+}
+EXPORT_SYMBOL(tcf_hash_create);
+
+void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+ unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+
+ write_lock_bh(hinfo->lock);
+ p->tcfc_next = hinfo->htab[h];
+ hinfo->htab[h] = p;
+ write_unlock_bh(hinfo->lock);
+}
+EXPORT_SYMBOL(tcf_hash_insert);
static struct tc_action_ops *act_base = NULL;
static DEFINE_RWLOCK(act_mod_lock);
@@ -156,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
- skb, skb->input_dev ? skb->input_dev->name : "xxx",
- skb->dev->name);
ret = TC_ACT_OK;
goto exec_done;
}
@@ -188,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind)
for (a = act; a; a = act) {
if (a->ops && a->ops->cleanup) {
- DPRINTK("tcf_action_destroy destroying %p next %p\n",
- a, a->next);
if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
module_put(a->ops->owner);
act = act->next;
@@ -251,15 +459,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
- goto rtattr_failure;
+ goto errout;
r->rta_len = skb->tail - (u8*)r;
}
return 0;
rtattr_failure:
+ err = -EINVAL;
+errout:
skb_trim(skb, b - skb->data);
- return -err;
+ return err;
}
struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
@@ -306,14 +516,14 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
goto err_mod;
}
#endif
+ *err = -ENOENT;
goto err_out;
}
*err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_KERNEL);
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
if (a == NULL)
goto err_mod;
- memset(a, 0, sizeof(*a));
/* backward compatibility for policer */
if (name == NULL)
@@ -330,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
if (*err != ACT_P_CREATED)
module_put(a_o->owner);
a->ops = a_o;
- DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
*err = 0;
return a;
@@ -391,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (compat_mode) {
if (a->type == TCA_OLD_COMPAT)
err = gnet_stats_start_copy_compat(skb, 0,
- TCA_STATS, TCA_XSTATS, h->stats_lock, &d);
+ TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d);
else
return 0;
} else
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
- h->stats_lock, &d);
+ h->tcf_stats_lock, &d);
if (err < 0)
goto errout;
@@ -405,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (a->ops->get_stats(skb, a) < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, &h->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
#ifdef CONFIG_NET_ESTIMATOR
- gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
#endif
- gnet_stats_copy_queue(&d, &h->qstats) < 0)
+ gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
@@ -458,7 +667,6 @@ static int
act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
{
struct sk_buff *skb;
- int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -467,10 +675,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
kfree_skb(skb);
return -EINVAL;
}
- err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
- if (err > 0)
- err = 0;
- return err;
+
+ return rtnl_unicast(skb, pid);
}
static struct tc_action *
@@ -490,10 +696,9 @@ tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]);
*err = -ENOMEM;
- a = kmalloc(sizeof(struct tc_action), GFP_KERNEL);
+ a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
if (a == NULL)
return NULL;
- memset(a, 0, sizeof(struct tc_action));
*err = -EINVAL;
a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]);
@@ -529,12 +734,11 @@ static struct tc_action *create_a(int i)
{
struct tc_action *act;
- act = kmalloc(sizeof(*act), GFP_KERNEL);
+ act = kzalloc(sizeof(*act), GFP_KERNEL);
if (act == NULL) {
printk("create_a: failed to alloc!\n");
return NULL;
}
- memset(act, 0, sizeof(*act));
act->order = i;
return act;
}
@@ -600,8 +804,8 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
return err;
rtattr_failure:
- module_put(a->ops->owner);
nlmsg_failure:
+ module_put(a->ops->owner);
err_out:
kfree_skb(skb);
kfree(a);
@@ -777,7 +981,7 @@ replay:
return ret;
}
-static char *
+static struct rtattr *
find_dump_kind(struct nlmsghdr *n)
{
struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
@@ -805,7 +1009,7 @@ find_dump_kind(struct nlmsghdr *n)
return NULL;
kind = tb2[TCA_ACT_KIND-1];
- return (char *) RTA_DATA(kind);
+ return kind;
}
static int
@@ -818,16 +1022,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tc_action a;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
- char *kind = find_dump_kind(cb->nlh);
+ struct rtattr *kind = find_dump_kind(cb->nlh);
if (kind == NULL) {
printk("tc_dump_action: action bad kind\n");
return 0;
}
- a_o = tc_lookup_action_n(kind);
+ a_o = tc_lookup_action(kind);
if (a_o == NULL) {
- printk("failed to find %s\n", kind);
return 0;
}
@@ -835,7 +1038,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
a.ops = a_o;
if (a_o->walk == NULL) {
- printk("tc_dump_action: %s !capable of dumping table\n", kind);
+ printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
goto rtattr_failure;
}
@@ -883,8 +1086,6 @@ static int __init tc_action_init(void)
link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
}
- printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
- "hadi@cyberus.ca)\n");
return 0;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a1e68f78dcc2..6cff56696a81 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -13,7 +13,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -35,48 +34,43 @@
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
-/* use generic hash table */
-#define MY_TAB_SIZE 16
-#define MY_TAB_MASK 15
-
-static u32 idx_gen;
-static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
+#define GACT_TAB_MASK 15
+static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
+static u32 gact_idx_gen;
static DEFINE_RWLOCK(gact_lock);
-/* ovewrride the defaults */
-#define tcf_st tcf_gact
-#define tc_st tc_gact
-#define tcf_t_lock gact_lock
-#define tcf_ht tcf_gact_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo gact_hash_info = {
+ .htab = tcf_gact_ht,
+ .hmask = GACT_TAB_MASK,
+ .lock = &gact_lock,
+};
#ifdef CONFIG_GACT_PROB
-static int gact_net_rand(struct tcf_gact *p)
+static int gact_net_rand(struct tcf_gact *gact)
{
- if (net_random()%p->pval)
- return p->action;
- return p->paction;
+ if (net_random() % gact->tcfg_pval)
+ return gact->tcf_action;
+ return gact->tcfg_paction;
}
-static int gact_determ(struct tcf_gact *p)
+static int gact_determ(struct tcf_gact *gact)
{
- if (p->bstats.packets%p->pval)
- return p->action;
- return p->paction;
+ if (gact->tcf_bstats.packets % gact->tcfg_pval)
+ return gact->tcf_action;
+ return gact->tcfg_paction;
}
-typedef int (*g_rand)(struct tcf_gact *p);
+typedef int (*g_rand)(struct tcf_gact *gact);
static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
-#endif
+#endif /* CONFIG_GACT_PROB */
static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_GACT_MAX];
struct tc_gact *parm;
- struct tcf_gact *p;
+ struct tcf_gact *gact;
+ struct tcf_common *pc;
int ret = 0;
if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
@@ -95,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
return -EOPNOTSUPP;
#endif
- p = tcf_hash_check(parm->index, a, ovr, bind);
- if (p == NULL) {
- p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
- if (p == NULL)
+ pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+ bind, &gact_idx_gen, &gact_hash_info);
+ if (unlikely(!pc))
return -ENOMEM;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_hash_release(p, bind);
+ tcf_hash_release(pc, bind, &gact_hash_info);
return -EEXIST;
}
}
- spin_lock_bh(&p->lock);
- p->action = parm->action;
+ gact = to_gact(pc);
+
+ spin_lock_bh(&gact->tcf_lock);
+ gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB-1] != NULL) {
struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
- p->paction = p_parm->paction;
- p->pval = p_parm->pval;
- p->ptype = p_parm->ptype;
+ gact->tcfg_paction = p_parm->paction;
+ gact->tcfg_pval = p_parm->pval;
+ gact->tcfg_ptype = p_parm->ptype;
}
#endif
- spin_unlock_bh(&p->lock);
+ spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(p);
+ tcf_hash_insert(pc, &gact_hash_info);
return ret;
}
-static int
-tcf_gact_cleanup(struct tc_action *a, int bind)
+static int tcf_gact_cleanup(struct tc_action *a, int bind)
{
- struct tcf_gact *p = PRIV(a, gact);
+ struct tcf_gact *gact = a->priv;
- if (p != NULL)
- return tcf_hash_release(p, bind);
+ if (gact)
+ return tcf_hash_release(&gact->common, bind, &gact_hash_info);
return 0;
}
-static int
-tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
{
- struct tcf_gact *p = PRIV(a, gact);
+ struct tcf_gact *gact = a->priv;
int action = TC_ACT_SHOT;
- spin_lock(&p->lock);
+ spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (p->ptype && gact_rand[p->ptype] != NULL)
- action = gact_rand[p->ptype](p);
+ if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+ action = gact_rand[gact->tcfg_ptype](gact);
else
- action = p->action;
+ action = gact->tcf_action;
#else
- action = p->action;
+ action = gact->tcf_action;
#endif
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
+ gact->tcf_bstats.bytes += skb->len;
+ gact->tcf_bstats.packets++;
if (action == TC_ACT_SHOT)
- p->qstats.drops++;
- p->tm.lastuse = jiffies;
- spin_unlock(&p->lock);
+ gact->tcf_qstats.drops++;
+ gact->tcf_tm.lastuse = jiffies;
+ spin_unlock(&gact->tcf_lock);
return action;
}
-static int
-tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
struct tc_gact opt;
- struct tcf_gact *p = PRIV(a, gact);
+ struct tcf_gact *gact = a->priv;
struct tcf_t t;
- opt.index = p->index;
- opt.refcnt = p->refcnt - ref;
- opt.bindcnt = p->bindcnt - bind;
- opt.action = p->action;
+ opt.index = gact->tcf_index;
+ opt.refcnt = gact->tcf_refcnt - ref;
+ opt.bindcnt = gact->tcf_bindcnt - bind;
+ opt.action = gact->tcf_action;
RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
#ifdef CONFIG_GACT_PROB
- if (p->ptype) {
+ if (gact->tcfg_ptype) {
struct tc_gact_p p_opt;
- p_opt.paction = p->paction;
- p_opt.pval = p->pval;
- p_opt.ptype = p->ptype;
+ p_opt.paction = gact->tcfg_paction;
+ p_opt.pval = gact->tcfg_pval;
+ p_opt.ptype = gact->tcfg_ptype;
RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
}
#endif
- t.install = jiffies_to_clock_t(jiffies - p->tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tm.expires);
+ t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
return skb->len;
- rtattr_failure:
+rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
+ .hinfo = &gact_hash_info,
.type = TCA_ACT_GACT,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
@@ -209,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
MODULE_DESCRIPTION("Generic Classifier actions");
MODULE_LICENSE("GPL");
-static int __init
-gact_init_module(void)
+static int __init gact_init_module(void)
{
#ifdef CONFIG_GACT_PROB
printk("GACT probability on\n");
@@ -220,8 +214,7 @@ gact_init_module(void)
return tcf_register_action(&act_gact_ops);
}
-static void __exit
-gact_cleanup_module(void)
+static void __exit gact_cleanup_module(void)
{
tcf_unregister_action(&act_gact_ops);
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 37640c6fc014..d8c9310da6e5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -14,7 +14,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -39,25 +38,19 @@
#include <linux/netfilter_ipv4/ip_tables.h>
-/* use generic hash table */
-#define MY_TAB_SIZE 16
-#define MY_TAB_MASK 15
-static u32 idx_gen;
-static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
-/* ipt hash table lock */
+#define IPT_TAB_MASK 15
+static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
+static u32 ipt_idx_gen;
static DEFINE_RWLOCK(ipt_lock);
-/* ovewrride the defaults */
-#define tcf_st tcf_ipt
-#define tcf_t_lock ipt_lock
-#define tcf_ht tcf_ipt_ht
-
-#define CONFIG_NET_ACT_INIT
-#include <net/pkt_act.h>
+static struct tcf_hashinfo ipt_hash_info = {
+ .htab = tcf_ipt_ht,
+ .hmask = IPT_TAB_MASK,
+ .lock = &ipt_lock,
+};
-static int
-ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
{
struct ipt_target *target;
int ret = 0;
@@ -66,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
if (!target)
return -ENOENT;
- DPRINTK("ipt_init_target: found %s\n", target->name);
t->u.kernel.target = target;
ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
@@ -77,10 +69,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(table, NULL,
t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t),
hook)) {
- DPRINTK("ipt_init_target: check failed for `%s'.\n",
- t->u.kernel.target->name);
module_put(t->u.kernel.target->me);
ret = -EINVAL;
}
@@ -88,40 +77,37 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
return ret;
}
-static void
-ipt_destroy_target(struct ipt_entry_target *t)
+static void ipt_destroy_target(struct ipt_entry_target *t)
{
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
}
-static int
-tcf_ipt_release(struct tcf_ipt *p, int bind)
+static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
{
int ret = 0;
- if (p) {
+ if (ipt) {
if (bind)
- p->bindcnt--;
- p->refcnt--;
- if (p->bindcnt <= 0 && p->refcnt <= 0) {
- ipt_destroy_target(p->t);
- kfree(p->tname);
- kfree(p->t);
- tcf_hash_destroy(p);
+ ipt->tcf_bindcnt--;
+ ipt->tcf_refcnt--;
+ if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_tname);
+ kfree(ipt->tcfi_t);
+ tcf_hash_destroy(&ipt->common, &ipt_hash_info);
ret = ACT_P_DELETED;
}
}
return ret;
}
-static int
-tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
- int ovr, int bind)
+static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_IPT_MAX];
- struct tcf_ipt *p;
+ struct tcf_ipt *ipt;
+ struct tcf_common *pc;
struct ipt_entry_target *td, *t;
char *tname;
int ret = 0, err;
@@ -145,49 +131,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
- p = tcf_hash_check(index, a, ovr, bind);
- if (p == NULL) {
- p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
- if (p == NULL)
+ pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
+ &ipt_idx_gen, &ipt_hash_info);
+ if (unlikely(!pc))
return -ENOMEM;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_ipt_release(p, bind);
+ tcf_ipt_release(to_ipt(pc), bind);
return -EEXIST;
}
}
+ ipt = to_ipt(pc);
hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
err = -ENOMEM;
tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
- if (tname == NULL)
+ if (unlikely(!tname))
goto err1;
if (tb[TCA_IPT_TABLE - 1] == NULL ||
rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
strcpy(tname, "mangle");
t = kmalloc(td->u.target_size, GFP_KERNEL);
- if (t == NULL)
+ if (unlikely(!t))
goto err2;
memcpy(t, td, td->u.target_size);
if ((err = ipt_init_target(t, tname, hook)) < 0)
goto err3;
- spin_lock_bh(&p->lock);
+ spin_lock_bh(&ipt->tcf_lock);
if (ret != ACT_P_CREATED) {
- ipt_destroy_target(p->t);
- kfree(p->tname);
- kfree(p->t);
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_tname);
+ kfree(ipt->tcfi_t);
}
- p->tname = tname;
- p->t = t;
- p->hook = hook;
- spin_unlock_bh(&p->lock);
+ ipt->tcfi_tname = tname;
+ ipt->tcfi_t = t;
+ ipt->tcfi_hook = hook;
+ spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(p);
+ tcf_hash_insert(pc, &ipt_hash_info);
return ret;
err3:
@@ -195,33 +183,32 @@ err3:
err2:
kfree(tname);
err1:
- kfree(p);
+ kfree(pc);
return err;
}
-static int
-tcf_ipt_cleanup(struct tc_action *a, int bind)
+static int tcf_ipt_cleanup(struct tc_action *a, int bind)
{
- struct tcf_ipt *p = PRIV(a, ipt);
- return tcf_ipt_release(p, bind);
+ struct tcf_ipt *ipt = a->priv;
+ return tcf_ipt_release(ipt, bind);
}
-static int
-tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
{
int ret = 0, result = 0;
- struct tcf_ipt *p = PRIV(a, ipt);
+ struct tcf_ipt *ipt = a->priv;
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return TC_ACT_UNSPEC;
}
- spin_lock(&p->lock);
+ spin_lock(&ipt->tcf_lock);
- p->tm.lastuse = jiffies;
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
+ ipt->tcf_tm.lastuse = jiffies;
+ ipt->tcf_bstats.bytes += skb->len;
+ ipt->tcf_bstats.packets++;
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
@@ -230,16 +217,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
/* iptables targets take a double skb pointer in case the skb
* needs to be replaced. We don't own the skb, so this must not
* happen. The pskb_expand_head above should make sure of this */
- ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook,
- p->t->u.kernel.target, p->t->data,
- NULL);
+ ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+ ipt->tcfi_hook,
+ ipt->tcfi_t->u.kernel.target,
+ ipt->tcfi_t->data);
switch (ret) {
case NF_ACCEPT:
result = TC_ACT_OK;
break;
case NF_DROP:
result = TC_ACT_SHOT;
- p->qstats.drops++;
+ ipt->tcf_qstats.drops++;
break;
case IPT_CONTINUE:
result = TC_ACT_PIPE;
@@ -250,53 +238,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
result = TC_POLICE_OK;
break;
}
- spin_unlock(&p->lock);
+ spin_unlock(&ipt->tcf_lock);
return result;
}
-static int
-tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
+ unsigned char *b = skb->tail;
+ struct tcf_ipt *ipt = a->priv;
struct ipt_entry_target *t;
struct tcf_t tm;
struct tc_cnt c;
- unsigned char *b = skb->tail;
- struct tcf_ipt *p = PRIV(a, ipt);
/* for simple targets kernel size == user size
** user name = target name
** for foolproof you need to not assume this
*/
- t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
- if (t == NULL)
+ t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
+ if (unlikely(!t))
goto rtattr_failure;
- c.bindcnt = p->bindcnt - bind;
- c.refcnt = p->refcnt - ref;
- memcpy(t, p->t, p->t->u.user.target_size);
- strcpy(t->u.user.name, p->t->u.kernel.target->name);
-
- DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
- strlen(p->tname));
- DPRINTK("\tdump target name %s size %d size user %d "
- "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
- p->t->u.target_size, p->t->u.user.target_size,
- p->t->data[0], p->t->data[1]);
- RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
- RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
- RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
+ c.bindcnt = ipt->tcf_bindcnt - bind;
+ c.refcnt = ipt->tcf_refcnt - ref;
+ memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
+ strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
+
+ RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
+ RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
+ RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
- RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname);
- tm.install = jiffies_to_clock_t(jiffies - p->tm.install);
- tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
- tm.expires = jiffies_to_clock_t(p->tm.expires);
+ RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
+ tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
+ tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
+ tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
kfree(t);
return skb->len;
- rtattr_failure:
+rtattr_failure:
skb_trim(skb, b - skb->data);
kfree(t);
return -1;
@@ -304,6 +285,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
+ .hinfo = &ipt_hash_info,
.type = TCA_ACT_IPT,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
@@ -319,14 +301,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
MODULE_DESCRIPTION("Iptables target actions");
MODULE_LICENSE("GPL");
-static int __init
-ipt_init_module(void)
+static int __init ipt_init_module(void)
{
return tcf_register_action(&act_ipt_ops);
}
-static void __exit
-ipt_cleanup_module(void)
+static void __exit ipt_cleanup_module(void)
{
tcf_unregister_action(&act_ipt_ops);
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 4fcccbd50885..483897271f15 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -15,7 +15,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -40,46 +39,39 @@
#include <linux/etherdevice.h>
#include <linux/if_arp.h>
-
-/* use generic hash table */
-#define MY_TAB_SIZE 8
-#define MY_TAB_MASK (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
+#define MIRRED_TAB_MASK 7
+static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
+static u32 mirred_idx_gen;
static DEFINE_RWLOCK(mirred_lock);
-/* ovewrride the defaults */
-#define tcf_st tcf_mirred
-#define tc_st tc_mirred
-#define tcf_t_lock mirred_lock
-#define tcf_ht tcf_mirred_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo mirred_hash_info = {
+ .htab = tcf_mirred_ht,
+ .hmask = MIRRED_TAB_MASK,
+ .lock = &mirred_lock,
+};
-static inline int
-tcf_mirred_release(struct tcf_mirred *p, int bind)
+static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
{
- if (p) {
+ if (m) {
if (bind)
- p->bindcnt--;
- p->refcnt--;
- if(!p->bindcnt && p->refcnt <= 0) {
- dev_put(p->dev);
- tcf_hash_destroy(p);
+ m->tcf_bindcnt--;
+ m->tcf_refcnt--;
+ if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+ dev_put(m->tcfm_dev);
+ tcf_hash_destroy(&m->common, &mirred_hash_info);
return 1;
}
}
return 0;
}
-static int
-tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
- int ovr, int bind)
+static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_MIRRED_MAX];
struct tc_mirred *parm;
- struct tcf_mirred *p;
+ struct tcf_mirred *m;
+ struct tcf_common *pc;
struct net_device *dev = NULL;
int ret = 0;
int ok_push = 0;
@@ -111,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
}
}
- p = tcf_hash_check(parm->index, a, ovr, bind);
- if (p == NULL) {
+ pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
+ if (!pc) {
if (!parm->ifindex)
return -EINVAL;
- p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
- if (p == NULL)
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
+ &mirred_idx_gen, &mirred_hash_info);
+ if (unlikely(!pc))
return -ENOMEM;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_mirred_release(p, bind);
+ tcf_mirred_release(to_mirred(pc), bind);
return -EEXIST;
}
}
+ m = to_mirred(pc);
- spin_lock_bh(&p->lock);
- p->action = parm->action;
- p->eaction = parm->eaction;
+ spin_lock_bh(&m->tcf_lock);
+ m->tcf_action = parm->action;
+ m->tcfm_eaction = parm->eaction;
if (parm->ifindex) {
- p->ifindex = parm->ifindex;
+ m->tcfm_ifindex = parm->ifindex;
if (ret != ACT_P_CREATED)
- dev_put(p->dev);
- p->dev = dev;
+ dev_put(m->tcfm_dev);
+ m->tcfm_dev = dev;
dev_hold(dev);
- p->ok_push = ok_push;
+ m->tcfm_ok_push = ok_push;
}
- spin_unlock_bh(&p->lock);
+ spin_unlock_bh(&m->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(p);
+ tcf_hash_insert(pc, &mirred_hash_info);
- DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
- "ifindex %d\n", parm->index, parm->action, parm->eaction,
- dev->name, parm->ifindex);
return ret;
}
-static int
-tcf_mirred_cleanup(struct tc_action *a, int bind)
+static int tcf_mirred_cleanup(struct tc_action *a, int bind)
{
- struct tcf_mirred *p = PRIV(a, mirred);
+ struct tcf_mirred *m = a->priv;
- if (p != NULL)
- return tcf_mirred_release(p, bind);
+ if (m)
+ return tcf_mirred_release(m, bind);
return 0;
}
-static int
-tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
{
- struct tcf_mirred *p = PRIV(a, mirred);
+ struct tcf_mirred *m = a->priv;
struct net_device *dev;
struct sk_buff *skb2 = NULL;
u32 at = G_TC_AT(skb->tc_verd);
- spin_lock(&p->lock);
+ spin_lock(&m->tcf_lock);
- dev = p->dev;
- p->tm.lastuse = jiffies;
+ dev = m->tcfm_dev;
+ m->tcf_tm.lastuse = jiffies;
if (!(dev->flags&IFF_UP) ) {
if (net_ratelimit())
@@ -177,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
bad_mirred:
if (skb2 != NULL)
kfree_skb(skb2);
- p->qstats.overlimits++;
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
- spin_unlock(&p->lock);
+ m->tcf_qstats.overlimits++;
+ m->tcf_bstats.bytes += skb->len;
+ m->tcf_bstats.packets++;
+ spin_unlock(&m->tcf_lock);
/* should we be asking for packet to be dropped?
* may make sense for redirect case only
*/
@@ -190,59 +180,59 @@ bad_mirred:
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 == NULL)
goto bad_mirred;
- if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) {
+ if (m->tcfm_eaction != TCA_EGRESS_MIRROR &&
+ m->tcfm_eaction != TCA_EGRESS_REDIR) {
if (net_ratelimit())
- printk("tcf_mirred unknown action %d\n", p->eaction);
+ printk("tcf_mirred unknown action %d\n",
+ m->tcfm_eaction);
goto bad_mirred;
}
- p->bstats.bytes += skb2->len;
- p->bstats.packets++;
+ m->tcf_bstats.bytes += skb2->len;
+ m->tcf_bstats.packets++;
if (!(at & AT_EGRESS))
- if (p->ok_push)
+ if (m->tcfm_ok_push)
skb_push(skb2, skb2->dev->hard_header_len);
/* mirror is always swallowed */
- if (p->eaction != TCA_EGRESS_MIRROR)
+ if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
skb2->dev = dev;
skb2->input_dev = skb->dev;
dev_queue_xmit(skb2);
- spin_unlock(&p->lock);
- return p->action;
+ spin_unlock(&m->tcf_lock);
+ return m->tcf_action;
}
-static int
-tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
+ struct tcf_mirred *m = a->priv;
struct tc_mirred opt;
- struct tcf_mirred *p = PRIV(a, mirred);
struct tcf_t t;
- opt.index = p->index;
- opt.action = p->action;
- opt.refcnt = p->refcnt - ref;
- opt.bindcnt = p->bindcnt - bind;
- opt.eaction = p->eaction;
- opt.ifindex = p->ifindex;
- DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
- p->index, p->action, p->eaction, p->ifindex);
+ opt.index = m->tcf_index;
+ opt.action = m->tcf_action;
+ opt.refcnt = m->tcf_refcnt - ref;
+ opt.bindcnt = m->tcf_bindcnt - bind;
+ opt.eaction = m->tcfm_eaction;
+ opt.ifindex = m->tcfm_ifindex;
RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
- t.install = jiffies_to_clock_t(jiffies - p->tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tm.expires);
+ t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
return skb->len;
- rtattr_failure:
+rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
+ .hinfo = &mirred_hash_info,
.type = TCA_ACT_MIRRED,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
@@ -258,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)");
MODULE_DESCRIPTION("Device Mirror/redirect actions");
MODULE_LICENSE("GPL");
-static int __init
-mirred_init_module(void)
+static int __init mirred_init_module(void)
{
printk("Mirror/redirect action on\n");
return tcf_register_action(&act_mirred_ops);
}
-static void __exit
-mirred_cleanup_module(void)
+static void __exit mirred_cleanup_module(void)
{
tcf_unregister_action(&act_mirred_ops);
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 1742a68e0122..8ac65c219b98 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -12,7 +12,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -34,32 +33,25 @@
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
-
-#define PEDIT_DEB 1
-
-/* use generic hash table */
-#define MY_TAB_SIZE 16
-#define MY_TAB_MASK 15
-static u32 idx_gen;
-static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
+#define PEDIT_TAB_MASK 15
+static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
+static u32 pedit_idx_gen;
static DEFINE_RWLOCK(pedit_lock);
-#define tcf_st tcf_pedit
-#define tc_st tc_pedit
-#define tcf_t_lock pedit_lock
-#define tcf_ht tcf_pedit_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo pedit_hash_info = {
+ .htab = tcf_pedit_ht,
+ .hmask = PEDIT_TAB_MASK,
+ .lock = &pedit_lock,
+};
-static int
-tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
- int ovr, int bind)
+static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_PEDIT_MAX];
struct tc_pedit *parm;
int ret = 0;
struct tcf_pedit *p;
+ struct tcf_common *pc;
struct tc_pedit_key *keys = NULL;
int ksize;
@@ -74,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
return -EINVAL;
- p = tcf_hash_check(parm->index, a, ovr, bind);
- if (p == NULL) {
+ pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
+ if (!pc) {
if (!parm->nkeys)
return -EINVAL;
- p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
- if (p == NULL)
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+ &pedit_idx_gen, &pedit_hash_info);
+ if (unlikely(!pc))
return -ENOMEM;
+ p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- kfree(p);
+ kfree(pc);
return -ENOMEM;
}
ret = ACT_P_CREATED;
} else {
+ p = to_pedit(pc);
if (!ovr) {
- tcf_hash_release(p, bind);
+ tcf_hash_release(pc, bind, &pedit_hash_info);
return -EEXIST;
}
- if (p->nkeys && p->nkeys != parm->nkeys) {
+ if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
return -ENOMEM;
}
}
- spin_lock_bh(&p->lock);
- p->flags = parm->flags;
- p->action = parm->action;
+ spin_lock_bh(&p->tcf_lock);
+ p->tcfp_flags = parm->flags;
+ p->tcf_action = parm->action;
if (keys) {
- kfree(p->keys);
- p->keys = keys;
- p->nkeys = parm->nkeys;
+ kfree(p->tcfp_keys);
+ p->tcfp_keys = keys;
+ p->tcfp_nkeys = parm->nkeys;
}
- memcpy(p->keys, parm->keys, ksize);
- spin_unlock_bh(&p->lock);
+ memcpy(p->tcfp_keys, parm->keys, ksize);
+ spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(p);
+ tcf_hash_insert(pc, &pedit_hash_info);
return ret;
}
-static int
-tcf_pedit_cleanup(struct tc_action *a, int bind)
+static int tcf_pedit_cleanup(struct tc_action *a, int bind)
{
- struct tcf_pedit *p = PRIV(a, pedit);
+ struct tcf_pedit *p = a->priv;
- if (p != NULL) {
- struct tc_pedit_key *keys = p->keys;
- if (tcf_hash_release(p, bind)) {
+ if (p) {
+ struct tc_pedit_key *keys = p->tcfp_keys;
+ if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
kfree(keys);
return 1;
}
@@ -129,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
return 0;
}
-static int
-tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
{
- struct tcf_pedit *p = PRIV(a, pedit);
+ struct tcf_pedit *p = a->priv;
int i, munged = 0;
u8 *pptr;
if (!(skb->tc_verd & TC_OK2MUNGE)) {
/* should we set skb->cloned? */
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
- return p->action;
+ return p->tcf_action;
}
}
pptr = skb->nh.raw;
- spin_lock(&p->lock);
+ spin_lock(&p->tcf_lock);
- p->tm.lastuse = jiffies;
+ p->tcf_tm.lastuse = jiffies;
- if (p->nkeys > 0) {
- struct tc_pedit_key *tkey = p->keys;
+ if (p->tcfp_nkeys > 0) {
+ struct tc_pedit_key *tkey = p->tcfp_keys;
- for (i = p->nkeys; i > 0; i--, tkey++) {
+ for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
u32 *ptr;
int offset = tkey->off;
@@ -170,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
printk("offset must be on 32 bit boundaries\n");
goto bad;
}
- if (skb->len < 0 || (offset > 0 && offset > skb->len)) {
+ if (skb->len < 0 ||
+ (offset > 0 && offset > skb->len)) {
printk("offset %d cant exceed pkt length %d\n",
offset, skb->len);
goto bad;
@@ -186,64 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
goto done;
} else {
- printk("pedit BUG: index %d\n",p->index);
+ printk("pedit BUG: index %d\n", p->tcf_index);
}
bad:
- p->qstats.overlimits++;
+ p->tcf_qstats.overlimits++;
done:
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
- spin_unlock(&p->lock);
- return p->action;
+ p->tcf_bstats.bytes += skb->len;
+ p->tcf_bstats.packets++;
+ spin_unlock(&p->tcf_lock);
+ return p->tcf_action;
}
-static int
-tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
+static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb->tail;
+ struct tcf_pedit *p = a->priv;
struct tc_pedit *opt;
- struct tcf_pedit *p = PRIV(a, pedit);
struct tcf_t t;
int s;
- s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
+ s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
/* netlink spinlocks held above us - must use ATOMIC */
- opt = kmalloc(s, GFP_ATOMIC);
- if (opt == NULL)
+ opt = kzalloc(s, GFP_ATOMIC);
+ if (unlikely(!opt))
return -ENOBUFS;
- memset(opt, 0, s);
-
- memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
- opt->index = p->index;
- opt->nkeys = p->nkeys;
- opt->flags = p->flags;
- opt->action = p->action;
- opt->refcnt = p->refcnt - ref;
- opt->bindcnt = p->bindcnt - bind;
-
-
-#ifdef PEDIT_DEB
- {
- /* Debug - get rid of later */
- int i;
- struct tc_pedit_key *key = opt->keys;
-
- for (i=0; i<opt->nkeys; i++, key++) {
- printk( "\n key #%d",i);
- printk( " at %d: val %08x mask %08x",
- (unsigned int)key->off,
- (unsigned int)key->val,
- (unsigned int)key->mask);
- }
- }
-#endif
+
+ memcpy(opt->keys, p->tcfp_keys,
+ p->tcfp_nkeys * sizeof(struct tc_pedit_key));
+ opt->index = p->tcf_index;
+ opt->nkeys = p->tcfp_nkeys;
+ opt->flags = p->tcfp_flags;
+ opt->action = p->tcf_action;
+ opt->refcnt = p->tcf_refcnt - ref;
+ opt->bindcnt = p->tcf_bindcnt - bind;
RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
- t.install = jiffies_to_clock_t(jiffies - p->tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tm.expires);
+ t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
kfree(opt);
return skb->len;
@@ -254,9 +232,9 @@ rtattr_failure:
return -1;
}
-static
-struct tc_action_ops act_pedit_ops = {
+static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
+ .hinfo = &pedit_hash_info,
.type = TCA_ACT_PEDIT,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
@@ -272,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
MODULE_DESCRIPTION("Generic Packet Editor actions");
MODULE_LICENSE("GPL");
-static int __init
-pedit_init_module(void)
+static int __init pedit_init_module(void)
{
return tcf_register_action(&act_pedit_ops);
}
-static void __exit
-pedit_cleanup_module(void)
+static void __exit pedit_cleanup_module(void)
{
tcf_unregister_action(&act_pedit_ops);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 24c348fa8922..fed47b658837 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -13,7 +13,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -33,43 +32,27 @@
#include <net/sock.h>
#include <net/act_api.h>
-#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
-#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
-#define PRIV(a) ((struct tcf_police *) (a)->priv)
+#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
-/* use generic hash table */
-#define MY_TAB_SIZE 16
-#define MY_TAB_MASK 15
-static u32 idx_gen;
-static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
-/* Policer hash table lock */
+#define POL_TAB_MASK 15
+static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
+static u32 police_idx_gen;
static DEFINE_RWLOCK(police_lock);
-/* Each policer is serialized by its individual spinlock */
-
-static __inline__ unsigned tcf_police_hash(u32 index)
-{
- return index&0xF;
-}
-
-static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
-{
- struct tcf_police *p;
+static struct tcf_hashinfo police_hash_info = {
+ .htab = tcf_police_ht,
+ .hmask = POL_TAB_MASK,
+ .lock = &police_lock,
+};
- read_lock(&police_lock);
- for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
- if (p->index == index)
- break;
- }
- read_unlock(&police_lock);
- return p;
-}
+/* Each policer is serialized by its individual spinlock */
#ifdef CONFIG_NET_CLS_ACT
static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
int type, struct tc_action *a)
{
- struct tcf_police *p;
+ struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct rtattr *r;
@@ -77,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
s_i = cb->args[0];
- for (i = 0; i < MY_TAB_SIZE; i++) {
- p = tcf_police_ht[tcf_police_hash(i)];
+ for (i = 0; i < (POL_TAB_MASK + 1); i++) {
+ p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
- for (; p; p = p->next) {
+ for (; p; p = p->tcfc_next) {
index++;
if (index < s_i)
continue;
@@ -111,48 +94,26 @@ rtattr_failure:
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
-
-static inline int
-tcf_act_police_hash_search(struct tc_action *a, u32 index)
-{
- struct tcf_police *p = tcf_police_lookup(index);
-
- if (p != NULL) {
- a->priv = p;
- return 1;
- } else {
- return 0;
- }
-}
#endif
-static inline u32 tcf_police_new_index(void)
-{
- do {
- if (++idx_gen == 0)
- idx_gen = 1;
- } while (tcf_police_lookup(idx_gen));
-
- return idx_gen;
-}
-
void tcf_police_destroy(struct tcf_police *p)
{
- unsigned h = tcf_police_hash(p->index);
- struct tcf_police **p1p;
+ unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
+ struct tcf_common **p1p;
- for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
- if (*p1p == p) {
+ for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+ if (*p1p == &p->common) {
write_lock_bh(&police_lock);
- *p1p = p->next;
+ *p1p = p->tcf_next;
write_unlock_bh(&police_lock);
#ifdef CONFIG_NET_ESTIMATOR
- gen_kill_estimator(&p->bstats, &p->rate_est);
+ gen_kill_estimator(&p->tcf_bstats,
+ &p->tcf_rate_est);
#endif
- if (p->R_tab)
- qdisc_put_rtab(p->R_tab);
- if (p->P_tab)
- qdisc_put_rtab(p->P_tab);
+ if (p->tcfp_R_tab)
+ qdisc_put_rtab(p->tcfp_R_tab);
+ if (p->tcfp_P_tab)
+ qdisc_put_rtab(p->tcfp_P_tab);
kfree(p);
return;
}
@@ -168,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
int ret = 0, err;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
- struct tcf_police *p;
+ struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
@@ -186,28 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
return -EINVAL;
- if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
- a->priv = p;
- if (bind) {
- p->bindcnt += 1;
- p->refcnt += 1;
+ if (parm->index) {
+ struct tcf_common *pc;
+
+ pc = tcf_hash_lookup(parm->index, &police_hash_info);
+ if (pc != NULL) {
+ a->priv = pc;
+ police = to_police(pc);
+ if (bind) {
+ police->tcf_bindcnt += 1;
+ police->tcf_refcnt += 1;
+ }
+ if (ovr)
+ goto override;
+ return ret;
}
- if (ovr)
- goto override;
- return ret;
}
- p = kmalloc(sizeof(*p), GFP_KERNEL);
- if (p == NULL)
+ police = kzalloc(sizeof(*police), GFP_KERNEL);
+ if (police == NULL)
return -ENOMEM;
- memset(p, 0, sizeof(*p));
-
ret = ACT_P_CREATED;
- p->refcnt = 1;
- spin_lock_init(&p->lock);
- p->stats_lock = &p->lock;
+ police->tcf_refcnt = 1;
+ spin_lock_init(&police->tcf_lock);
+ police->tcf_stats_lock = &police->tcf_lock;
if (bind)
- p->bindcnt = 1;
+ police->tcf_bindcnt = 1;
override:
if (parm->rate.rate) {
err = -ENOMEM;
@@ -217,67 +182,71 @@ override:
if (parm->peakrate.rate) {
P_tab = qdisc_get_rtab(&parm->peakrate,
tb[TCA_POLICE_PEAKRATE-1]);
- if (p->P_tab == NULL) {
+ if (P_tab == NULL) {
qdisc_put_rtab(R_tab);
goto failure;
}
}
}
/* No failure allowed after this point */
- spin_lock_bh(&p->lock);
+ spin_lock_bh(&police->tcf_lock);
if (R_tab != NULL) {
- qdisc_put_rtab(p->R_tab);
- p->R_tab = R_tab;
+ qdisc_put_rtab(police->tcfp_R_tab);
+ police->tcfp_R_tab = R_tab;
}
if (P_tab != NULL) {
- qdisc_put_rtab(p->P_tab);
- p->P_tab = P_tab;
+ qdisc_put_rtab(police->tcfp_P_tab);
+ police->tcfp_P_tab = P_tab;
}
if (tb[TCA_POLICE_RESULT-1])
- p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
- p->toks = p->burst = parm->burst;
- p->mtu = parm->mtu;
- if (p->mtu == 0) {
- p->mtu = ~0;
- if (p->R_tab)
- p->mtu = 255<<p->R_tab->rate.cell_log;
+ police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+ police->tcfp_toks = police->tcfp_burst = parm->burst;
+ police->tcfp_mtu = parm->mtu;
+ if (police->tcfp_mtu == 0) {
+ police->tcfp_mtu = ~0;
+ if (police->tcfp_R_tab)
+ police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
}
- if (p->P_tab)
- p->ptoks = L2T_P(p, p->mtu);
- p->action = parm->action;
+ if (police->tcfp_P_tab)
+ police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+ police->tcf_action = parm->action;
#ifdef CONFIG_NET_ESTIMATOR
if (tb[TCA_POLICE_AVRATE-1])
- p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+ police->tcfp_ewma_rate =
+ *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
if (est)
- gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+ gen_replace_estimator(&police->tcf_bstats,
+ &police->tcf_rate_est,
+ police->tcf_stats_lock, est);
#endif
- spin_unlock_bh(&p->lock);
+ spin_unlock_bh(&police->tcf_lock);
if (ret != ACT_P_CREATED)
return ret;
- PSCHED_GET_TIME(p->t_c);
- p->index = parm->index ? : tcf_police_new_index();
- h = tcf_police_hash(p->index);
+ PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcf_index = parm->index ? parm->index :
+ tcf_hash_new_index(&police_idx_gen, &police_hash_info);
+ h = tcf_hash(police->tcf_index, POL_TAB_MASK);
write_lock_bh(&police_lock);
- p->next = tcf_police_ht[h];
- tcf_police_ht[h] = p;
+ police->tcf_next = tcf_police_ht[h];
+ tcf_police_ht[h] = &police->common;
write_unlock_bh(&police_lock);
- a->priv = p;
+ a->priv = police;
return ret;
failure:
if (ret == ACT_P_CREATED)
- kfree(p);
+ kfree(police);
return err;
}
static int tcf_act_police_cleanup(struct tc_action *a, int bind)
{
- struct tcf_police *p = PRIV(a);
+ struct tcf_police *p = a->priv;
if (p != NULL)
return tcf_police_release(p, bind);
@@ -287,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
struct tcf_result *res)
{
+ struct tcf_police *police = a->priv;
psched_time_t now;
- struct tcf_police *p = PRIV(a);
long toks;
long ptoks = 0;
- spin_lock(&p->lock);
+ spin_lock(&police->tcf_lock);
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
+ police->tcf_bstats.bytes += skb->len;
+ police->tcf_bstats.packets++;
#ifdef CONFIG_NET_ESTIMATOR
- if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
- p->qstats.overlimits++;
- spin_unlock(&p->lock);
- return p->action;
+ if (police->tcfp_ewma_rate &&
+ police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+ police->tcf_qstats.overlimits++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
}
#endif
- if (skb->len <= p->mtu) {
- if (p->R_tab == NULL) {
- spin_unlock(&p->lock);
- return p->result;
+ if (skb->len <= police->tcfp_mtu) {
+ if (police->tcfp_R_tab == NULL) {
+ spin_unlock(&police->tcf_lock);
+ return police->tcfp_result;
}
PSCHED_GET_TIME(now);
- toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
- if (p->P_tab) {
- ptoks = toks + p->ptoks;
- if (ptoks > (long)L2T_P(p, p->mtu))
- ptoks = (long)L2T_P(p, p->mtu);
- ptoks -= L2T_P(p, skb->len);
+ toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+ police->tcfp_burst);
+ if (police->tcfp_P_tab) {
+ ptoks = toks + police->tcfp_ptoks;
+ if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+ ptoks = (long)L2T_P(police, police->tcfp_mtu);
+ ptoks -= L2T_P(police, skb->len);
}
- toks += p->toks;
- if (toks > (long)p->burst)
- toks = p->burst;
- toks -= L2T(p, skb->len);
-
+ toks += police->tcfp_toks;
+ if (toks > (long)police->tcfp_burst)
+ toks = police->tcfp_burst;
+ toks -= L2T(police, skb->len);
if ((toks|ptoks) >= 0) {
- p->t_c = now;
- p->toks = toks;
- p->ptoks = ptoks;
- spin_unlock(&p->lock);
- return p->result;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock(&police->tcf_lock);
+ return police->tcfp_result;
}
}
- p->qstats.overlimits++;
- spin_unlock(&p->lock);
- return p->action;
+ police->tcf_qstats.overlimits++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
}
static int
tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
+ struct tcf_police *police = a->priv;
struct tc_police opt;
- struct tcf_police *p = PRIV(a);
-
- opt.index = p->index;
- opt.action = p->action;
- opt.mtu = p->mtu;
- opt.burst = p->burst;
- opt.refcnt = p->refcnt - ref;
- opt.bindcnt = p->bindcnt - bind;
- if (p->R_tab)
- opt.rate = p->R_tab->rate;
+
+ opt.index = police->tcf_index;
+ opt.action = police->tcf_action;
+ opt.mtu = police->tcfp_mtu;
+ opt.burst = police->tcfp_burst;
+ opt.refcnt = police->tcf_refcnt - ref;
+ opt.bindcnt = police->tcf_bindcnt - bind;
+ if (police->tcfp_R_tab)
+ opt.rate = police->tcfp_R_tab->rate;
else
memset(&opt.rate, 0, sizeof(opt.rate));
- if (p->P_tab)
- opt.peakrate = p->P_tab->rate;
+ if (police->tcfp_P_tab)
+ opt.peakrate = police->tcfp_P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
- if (p->result)
- RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+ if (police->tcfp_result)
+ RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+ &police->tcfp_result);
#ifdef CONFIG_NET_ESTIMATOR
- if (p->ewma_rate)
- RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+ if (police->tcfp_ewma_rate)
+ RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
#endif
return skb->len;
@@ -381,13 +351,14 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
+ .hinfo = &police_hash_info,
.type = TCA_ID_POLICE,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
.cleanup = tcf_act_police_cleanup,
- .lookup = tcf_act_police_hash_search,
+ .lookup = tcf_hash_search,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
@@ -409,10 +380,39 @@ module_exit(police_cleanup_module);
#else /* CONFIG_NET_CLS_ACT */
-struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+static struct tcf_common *tcf_police_lookup(u32 index)
{
- unsigned h;
- struct tcf_police *p;
+ struct tcf_hashinfo *hinfo = &police_hash_info;
+ struct tcf_common *p;
+
+ read_lock(hinfo->lock);
+ for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+ p = p->tcfc_next) {
+ if (p->tcfc_index == index)
+ break;
+ }
+ read_unlock(hinfo->lock);
+
+ return p;
+}
+
+static u32 tcf_police_new_index(void)
+{
+ u32 *idx_gen = &police_idx_gen;
+ u32 val = *idx_gen;
+
+ do {
+ if (++val == 0)
+ val = 1;
+ } while (tcf_police_lookup(val));
+
+ return (*idx_gen = val);
+}
+
+struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+ unsigned int h;
+ struct tcf_police *police;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
@@ -425,150 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
- if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
- p->refcnt++;
- return p;
- }
+ if (parm->index) {
+ struct tcf_common *pc;
- p = kmalloc(sizeof(*p), GFP_KERNEL);
- if (p == NULL)
+ pc = tcf_police_lookup(parm->index);
+ if (pc) {
+ police = to_police(pc);
+ police->tcf_refcnt++;
+ return police;
+ }
+ }
+ police = kzalloc(sizeof(*police), GFP_KERNEL);
+ if (unlikely(!police))
return NULL;
- memset(p, 0, sizeof(*p));
- p->refcnt = 1;
- spin_lock_init(&p->lock);
- p->stats_lock = &p->lock;
+ police->tcf_refcnt = 1;
+ spin_lock_init(&police->tcf_lock);
+ police->tcf_stats_lock = &police->tcf_lock;
if (parm->rate.rate) {
- p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
- if (p->R_tab == NULL)
+ police->tcfp_R_tab =
+ qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+ if (police->tcfp_R_tab == NULL)
goto failure;
if (parm->peakrate.rate) {
- p->P_tab = qdisc_get_rtab(&parm->peakrate,
- tb[TCA_POLICE_PEAKRATE-1]);
- if (p->P_tab == NULL)
+ police->tcfp_P_tab =
+ qdisc_get_rtab(&parm->peakrate,
+ tb[TCA_POLICE_PEAKRATE-1]);
+ if (police->tcfp_P_tab == NULL)
goto failure;
}
}
if (tb[TCA_POLICE_RESULT-1]) {
if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
goto failure;
- p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+ police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
}
#ifdef CONFIG_NET_ESTIMATOR
if (tb[TCA_POLICE_AVRATE-1]) {
if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
goto failure;
- p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+ police->tcfp_ewma_rate =
+ *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
}
#endif
- p->toks = p->burst = parm->burst;
- p->mtu = parm->mtu;
- if (p->mtu == 0) {
- p->mtu = ~0;
- if (p->R_tab)
- p->mtu = 255<<p->R_tab->rate.cell_log;
+ police->tcfp_toks = police->tcfp_burst = parm->burst;
+ police->tcfp_mtu = parm->mtu;
+ if (police->tcfp_mtu == 0) {
+ police->tcfp_mtu = ~0;
+ if (police->tcfp_R_tab)
+ police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
}
- if (p->P_tab)
- p->ptoks = L2T_P(p, p->mtu);
- PSCHED_GET_TIME(p->t_c);
- p->index = parm->index ? : tcf_police_new_index();
- p->action = parm->action;
+ if (police->tcfp_P_tab)
+ police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+ PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcf_index = parm->index ? parm->index :
+ tcf_police_new_index();
+ police->tcf_action = parm->action;
#ifdef CONFIG_NET_ESTIMATOR
if (est)
- gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+ gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
+ police->tcf_stats_lock, est);
#endif
- h = tcf_police_hash(p->index);
+ h = tcf_hash(police->tcf_index, POL_TAB_MASK);
write_lock_bh(&police_lock);
- p->next = tcf_police_ht[h];
- tcf_police_ht[h] = p;
+ police->tcf_next = tcf_police_ht[h];
+ tcf_police_ht[h] = &police->common;
write_unlock_bh(&police_lock);
- return p;
+ return police;
failure:
- if (p->R_tab)
- qdisc_put_rtab(p->R_tab);
- kfree(p);
+ if (police->tcfp_R_tab)
+ qdisc_put_rtab(police->tcfp_R_tab);
+ kfree(police);
return NULL;
}
-int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police(struct sk_buff *skb, struct tcf_police *police)
{
psched_time_t now;
long toks;
long ptoks = 0;
- spin_lock(&p->lock);
+ spin_lock(&police->tcf_lock);
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
+ police->tcf_bstats.bytes += skb->len;
+ police->tcf_bstats.packets++;
#ifdef CONFIG_NET_ESTIMATOR
- if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
- p->qstats.overlimits++;
- spin_unlock(&p->lock);
- return p->action;
+ if (police->tcfp_ewma_rate &&
+ police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+ police->tcf_qstats.overlimits++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
}
#endif
-
- if (skb->len <= p->mtu) {
- if (p->R_tab == NULL) {
- spin_unlock(&p->lock);
- return p->result;
+ if (skb->len <= police->tcfp_mtu) {
+ if (police->tcfp_R_tab == NULL) {
+ spin_unlock(&police->tcf_lock);
+ return police->tcfp_result;
}
PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
- if (p->P_tab) {
- ptoks = toks + p->ptoks;
- if (ptoks > (long)L2T_P(p, p->mtu))
- ptoks = (long)L2T_P(p, p->mtu);
- ptoks -= L2T_P(p, skb->len);
+ toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+ police->tcfp_burst);
+ if (police->tcfp_P_tab) {
+ ptoks = toks + police->tcfp_ptoks;
+ if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+ ptoks = (long)L2T_P(police, police->tcfp_mtu);
+ ptoks -= L2T_P(police, skb->len);
}
- toks += p->toks;
- if (toks > (long)p->burst)
- toks = p->burst;
- toks -= L2T(p, skb->len);
-
+ toks += police->tcfp_toks;
+ if (toks > (long)police->tcfp_burst)
+ toks = police->tcfp_burst;
+ toks -= L2T(police, skb->len);
if ((toks|ptoks) >= 0) {
- p->t_c = now;
- p->toks = toks;
- p->ptoks = ptoks;
- spin_unlock(&p->lock);
- return p->result;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock(&police->tcf_lock);
+ return police->tcfp_result;
}
}
- p->qstats.overlimits++;
- spin_unlock(&p->lock);
- return p->action;
+ police->tcf_qstats.overlimits++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
}
EXPORT_SYMBOL(tcf_police);
-int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb->tail;
struct tc_police opt;
- opt.index = p->index;
- opt.action = p->action;
- opt.mtu = p->mtu;
- opt.burst = p->burst;
- if (p->R_tab)
- opt.rate = p->R_tab->rate;
+ opt.index = police->tcf_index;
+ opt.action = police->tcf_action;
+ opt.mtu = police->tcfp_mtu;
+ opt.burst = police->tcfp_burst;
+ if (police->tcfp_R_tab)
+ opt.rate = police->tcfp_R_tab->rate;
else
memset(&opt.rate, 0, sizeof(opt.rate));
- if (p->P_tab)
- opt.peakrate = p->P_tab->rate;
+ if (police->tcfp_P_tab)
+ opt.peakrate = police->tcfp_P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
- if (p->result)
- RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+ if (police->tcfp_result)
+ RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+ &police->tcfp_result);
#ifdef CONFIG_NET_ESTIMATOR
- if (p->ewma_rate)
- RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+ if (police->tcfp_ewma_rate)
+ RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
#endif
return skb->len;
@@ -577,19 +585,20 @@ rtattr_failure:
return -1;
}
-int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
{
struct gnet_dump d;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
- TCA_XSTATS, p->stats_lock, &d) < 0)
+ TCA_XSTATS, police->tcf_stats_lock,
+ &d) < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
#ifdef CONFIG_NET_ESTIMATOR
- gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
#endif
- gnet_stats_copy_queue(&d, &p->qstats) < 0)
+ gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e5f2e1f431e2..901571a67707 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -10,7 +10,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -21,54 +20,175 @@
#define TCA_ACT_SIMP 22
-/* XXX: Hide all these common elements under some macro
- * probably
-*/
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
-/* use generic hash table with 8 buckets */
-#define MY_TAB_SIZE 8
-#define MY_TAB_MASK (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE];
+#define SIMP_TAB_MASK 7
+static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
+static u32 simp_idx_gen;
static DEFINE_RWLOCK(simp_lock);
-/* override the defaults */
-#define tcf_st tcf_defact
-#define tc_st tc_defact
-#define tcf_t_lock simp_lock
-#define tcf_ht tcf_simp_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
-#include <net/act_generic.h>
+static struct tcf_hashinfo simp_hash_info = {
+ .htab = tcf_simp_ht,
+ .hmask = SIMP_TAB_MASK,
+ .lock = &simp_lock,
+};
static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
{
- struct tcf_defact *p = PRIV(a, defact);
+ struct tcf_defact *d = a->priv;
- spin_lock(&p->lock);
- p->tm.lastuse = jiffies;
- p->bstats.bytes += skb->len;
- p->bstats.packets++;
+ spin_lock(&d->tcf_lock);
+ d->tcf_tm.lastuse = jiffies;
+ d->tcf_bstats.bytes += skb->len;
+ d->tcf_bstats.packets++;
/* print policy string followed by _ then packet count
* Example if this was the 3rd packet and the string was "hello"
* then it would look like "hello_3" (without quotes)
**/
- printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets);
- spin_unlock(&p->lock);
- return p->action;
+ printk("simple: %s_%d\n",
+ (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+ spin_unlock(&d->tcf_lock);
+ return d->tcf_action;
+}
+
+static int tcf_simp_release(struct tcf_defact *d, int bind)
+{
+ int ret = 0;
+ if (d) {
+ if (bind)
+ d->tcf_bindcnt--;
+ d->tcf_refcnt--;
+ if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
+ kfree(d->tcfd_defdata);
+ tcf_hash_destroy(&d->common, &simp_hash_info);
+ ret = 1;
+ }
+ }
+ return ret;
+}
+
+static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+ d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
+ if (unlikely(!d->tcfd_defdata))
+ return -ENOMEM;
+ d->tcfd_datalen = datalen;
+ memcpy(d->tcfd_defdata, defdata, datalen);
+ return 0;
+}
+
+static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+ kfree(d->tcfd_defdata);
+ return alloc_defdata(d, datalen, defdata);
+}
+
+static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct rtattr *tb[TCA_DEF_MAX];
+ struct tc_defact *parm;
+ struct tcf_defact *d;
+ struct tcf_common *pc;
+ void *defdata;
+ u32 datalen = 0;
+ int ret = 0;
+
+ if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
+ return -EINVAL;
+
+ if (tb[TCA_DEF_PARMS - 1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
+ return -EINVAL;
+
+ parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
+ defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
+ if (defdata == NULL)
+ return -EINVAL;
+
+ datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
+ if (datalen <= 0)
+ return -EINVAL;
+
+ pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+ &simp_idx_gen, &simp_hash_info);
+ if (unlikely(!pc))
+ return -ENOMEM;
+
+ d = to_defact(pc);
+ ret = alloc_defdata(d, datalen, defdata);
+ if (ret < 0) {
+ kfree(pc);
+ return ret;
+ }
+ ret = ACT_P_CREATED;
+ } else {
+ d = to_defact(pc);
+ if (!ovr) {
+ tcf_simp_release(d, bind);
+ return -EEXIST;
+ }
+ realloc_defdata(d, datalen, defdata);
+ }
+
+ spin_lock_bh(&d->tcf_lock);
+ d->tcf_action = parm->action;
+ spin_unlock_bh(&d->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &simp_hash_info);
+ return ret;
+}
+
+static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_defact *d = a->priv;
+
+ if (d)
+ return tcf_simp_release(d, bind);
+ return 0;
+}
+
+static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb->tail;
+ struct tcf_defact *d = a->priv;
+ struct tc_defact opt;
+ struct tcf_t t;
+
+ opt.index = d->tcf_index;
+ opt.refcnt = d->tcf_refcnt - ref;
+ opt.bindcnt = d->tcf_bindcnt - bind;
+ opt.action = d->tcf_action;
+ RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+ RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
+ t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+ RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
}
static struct tc_action_ops act_simp_ops = {
- .kind = "simple",
- .type = TCA_ACT_SIMP,
- .capab = TCA_CAP_NONE,
- .owner = THIS_MODULE,
- .act = tcf_simp,
- tca_use_default_ops
+ .kind = "simple",
+ .hinfo = &simp_hash_info,
+ .type = TCA_ACT_SIMP,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_simp,
+ .dump = tcf_simp_dump,
+ .cleanup = tcf_simp_cleanup,
+ .init = tcf_simp_init,
+ .walk = tcf_generic_walker,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b4d89fbb3782..37a184021647 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -17,7 +17,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -402,7 +401,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return skb->len;
- read_lock_bh(&qdisc_tree_lock);
+ read_lock(&qdisc_tree_lock);
if (!tcm->tcm_parent)
q = dev->qdisc_sleeping;
else
@@ -459,7 +458,7 @@ errout:
if (cl)
cops->put(q, cl);
out:
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
dev_put(dev);
return skb->len;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index dfb300bb6baa..09fda68c8b39 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -9,7 +9,6 @@
* Authors: Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -179,25 +178,23 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
err = -ENOBUFS;
if (head == NULL) {
- head = kmalloc(sizeof(*head), GFP_KERNEL);
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
if (head == NULL)
goto errout;
- memset(head, 0, sizeof(*head));
INIT_LIST_HEAD(&head->flist);
tp->root = head;
}
- f = kmalloc(sizeof(*f), GFP_KERNEL);
+ f = kzalloc(sizeof(*f), GFP_KERNEL);
if (f == NULL)
goto errout;
- memset(f, 0, sizeof(*f));
err = -EINVAL;
if (handle)
f->handle = handle;
else {
- int i = 0x80000000;
+ unsigned int i = 0x80000000;
do {
if (++head->hgenerator == 0x7FFFFFFF)
head->hgenerator = 1;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 75470486e405..e54acc6bcccd 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -18,7 +18,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -51,6 +50,7 @@
struct fw_head
{
struct fw_filter *ht[HTSIZE];
+ u32 mask;
};
struct fw_filter
@@ -102,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct fw_filter *f;
int r;
#ifdef CONFIG_NETFILTER
- u32 id = skb->nfmark;
+ u32 id = skb->nfmark & head->mask;
#else
u32 id = 0;
#endif
@@ -210,7 +210,9 @@ static int
fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
struct rtattr **tb, struct rtattr **tca, unsigned long base)
{
+ struct fw_head *head = (struct fw_head *)tp->root;
struct tcf_exts e;
+ u32 mask;
int err;
err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
@@ -233,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
}
#endif /* CONFIG_NET_CLS_IND */
+ if (tb[TCA_FW_MASK-1]) {
+ if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+ goto errout;
+ mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+ if (mask != head->mask)
+ goto errout;
+ } else if (head->mask != 0xFFFFFFFF)
+ goto errout;
+
tcf_exts_change(tp, &f->exts, &e);
return 0;
@@ -268,20 +279,26 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
return -EINVAL;
if (head == NULL) {
- head = kmalloc(sizeof(struct fw_head), GFP_KERNEL);
+ u32 mask = 0xFFFFFFFF;
+ if (tb[TCA_FW_MASK-1]) {
+ if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+ return -EINVAL;
+ mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+ }
+
+ head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
if (head == NULL)
return -ENOBUFS;
- memset(head, 0, sizeof(*head));
+ head->mask = mask;
tcf_tree_lock(tp);
tp->root = head;
tcf_tree_unlock(tp);
}
- f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL);
+ f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
if (f == NULL)
return -ENOBUFS;
- memset(f, 0, sizeof(*f));
f->id = handle;
@@ -333,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
static int fw_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
+ struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f = (struct fw_filter*)fh;
unsigned char *b = skb->tail;
struct rtattr *rta;
@@ -354,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
if (strlen(f->indev))
RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
#endif /* CONFIG_NET_CLS_IND */
+ if (head->mask != 0xFFFFFFFF)
+ RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
goto rtattr_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 520ff716dab2..d3aea730d4c8 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -10,7 +10,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
@@ -397,10 +396,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
h1 = to_hash(nhandle);
if ((b = head->table[h1]) == NULL) {
err = -ENOBUFS;
- b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+ b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
goto errout;
- memset(b, 0, sizeof(*b));
tcf_tree_lock(tp);
head->table[h1] = b;
@@ -476,20 +474,18 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
err = -ENOBUFS;
if (head == NULL) {
- head = kmalloc(sizeof(struct route4_head), GFP_KERNEL);
+ head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL)
goto errout;
- memset(head, 0, sizeof(struct route4_head));
tcf_tree_lock(tp);
tp->root = head;
tcf_tree_unlock(tp);
}
- f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL);
+ f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
if (f == NULL)
goto errout;
- memset(f, 0, sizeof(*f));
err = route4_set_parms(tp, base, f, handle, head, tb,
tca[TCA_RATE-1], 1);
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 572f06be3b02..6e230ecfba05 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -65,7 +65,6 @@
Well, as result, despite its simplicity, we get a pretty
powerful classification engine. */
-#include <linux/config.h>
struct rsvp_head
{
@@ -241,9 +240,8 @@ static int rsvp_init(struct tcf_proto *tp)
{
struct rsvp_head *data;
- data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
+ data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
if (data) {
- memset(data, 0, sizeof(struct rsvp_head));
tp->root = data;
return 0;
}
@@ -447,11 +445,10 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
goto errout2;
err = -ENOBUFS;
- f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
+ f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
if (f == NULL)
goto errout2;
- memset(f, 0, sizeof(*f));
h2 = 16;
if (tb[TCA_RSVP_SRC-1]) {
err = -EINVAL;
@@ -533,10 +530,9 @@ insert:
/* No session found. Create new one. */
err = -ENOBUFS;
- s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
+ s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
if (s == NULL)
goto errout;
- memset(s, 0, sizeof(*s));
memcpy(s->dst, dst, sizeof(s->dst));
if (pinfo) {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9f921174c8ab..5af8a59e1503 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -4,7 +4,6 @@
* Written 1998,1999 by Werner Almesberger, EPFL ICA
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -149,11 +148,10 @@ static int tcindex_init(struct tcf_proto *tp)
struct tcindex_data *p;
DPRINTK("tcindex_init(tp %p)\n",tp);
- p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
+ p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL);
if (!p)
return -ENOMEM;
- memset(p, 0, sizeof(*p));
p->mask = 0xffff;
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
@@ -297,16 +295,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
err = -ENOMEM;
if (!cp.perfect && !cp.h) {
if (valid_perfect_hash(&cp)) {
- cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL);
+ cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
if (!cp.perfect)
goto errout;
- memset(cp.perfect, 0, cp.hash * sizeof(*r));
balloc = 1;
} else {
- cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL);
+ cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
if (!cp.h)
goto errout;
- memset(cp.h, 0, cp.hash * sizeof(f));
balloc = 2;
}
}
@@ -317,10 +313,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
if (r == &new_filter_result) {
- f = kmalloc(sizeof(*f), GFP_KERNEL);
+ f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto errout_alloc;
- memset(f, 0, sizeof(*f));
}
if (tb[TCA_TCINDEX_CLASSID-1]) {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 78e052591fa9..0a6cfa0005be 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -33,7 +33,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -308,23 +307,21 @@ static int u32_init(struct tcf_proto *tp)
if (tp_c->q == tp->q)
break;
- root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
+ root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
if (root_ht == NULL)
return -ENOBUFS;
- memset(root_ht, 0, sizeof(*root_ht));
root_ht->divisor = 0;
root_ht->refcnt++;
root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
root_ht->prio = tp->prio;
if (tp_c == NULL) {
- tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
+ tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
if (tp_c == NULL) {
kfree(root_ht);
return -ENOBUFS;
}
- memset(tp_c, 0, sizeof(*tp_c));
tp_c->q = tp->q;
tp_c->next = u32_list;
u32_list = tp_c;
@@ -572,10 +569,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
if (handle == 0)
return -ENOMEM;
}
- ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+ ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
- memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
ht->tp_c = tp_c;
ht->refcnt = 0;
ht->divisor = divisor;
@@ -618,18 +614,16 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
s = RTA_DATA(tb[TCA_U32_SEL-1]);
- n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+ n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
if (n == NULL)
return -ENOBUFS;
- memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
#ifdef CONFIG_CLS_U32_PERF
- n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
+ n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
if (n->pf == NULL) {
kfree(n);
return -ENOBUFS;
}
- memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64));
#endif
memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
@@ -802,7 +796,7 @@ static int __init init_u32(void)
{
printk("u32 classifier\n");
#ifdef CONFIG_CLS_U32_PERF
- printk(" Perfomance counters on\n");
+ printk(" Performance counters on\n");
#endif
#ifdef CONFIG_NET_CLS_POLICE
printk(" OLD policer on \n");
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bf1f00f8b1bf..8ed93c39b4ea 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -9,7 +9,6 @@
* Authors: Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 700844d49d79..61e3b740ab1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -58,7 +58,6 @@
* only available if that subsytem is enabled in the kernel.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -774,10 +773,9 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
goto errout;
- meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+ meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (meta == NULL)
goto errout;
- memset(meta, 0, sizeof(*meta));
memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 71ea926a9f09..cc80babfd79f 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -9,7 +9,6 @@
* Authors: Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 77beabc91fa3..aa17d8f7c4c8 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -9,7 +9,6 @@
* Authors: Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 34e7e51e601e..e3ddfce0ac8d 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -12,7 +12,6 @@
* Based on net/sched/cls_u32.c
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5cb956b721e8..0fd0768a17c6 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -81,7 +81,6 @@
* open up a beer to watch the compilation going.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -322,10 +321,9 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
list_len = RTA_PAYLOAD(rt_list);
matches_len = tree_hdr->nmatches * sizeof(*em);
- tree->matches = kmalloc(matches_len, GFP_KERNEL);
+ tree->matches = kzalloc(matches_len, GFP_KERNEL);
if (tree->matches == NULL)
goto errout;
- memset(tree->matches, 0, matches_len);
/* We do not use rtattr_parse_nested here because the maximum
* number of attributes is unknown. This saves us the allocation
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
index 5d3ae03e22a7..0ebc98e9be2d 100644
--- a/net/sched/estimator.c
+++ b/net/sched/estimator.c
@@ -139,11 +139,10 @@ int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct r
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
- est = kmalloc(sizeof(*est), GFP_KERNEL);
+ est = kzalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOBUFS;
- memset(est, 0, sizeof(*est));
est->interval = parm->interval + 2;
est->stats = stats;
est->stats_lock = stats_lock;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 31570b9a6e9a..0b6489291140 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -15,7 +15,6 @@
* Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -196,14 +195,14 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
- read_lock_bh(&qdisc_tree_lock);
+ read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q->handle == handle) {
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
return q;
}
}
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
return NULL;
}
@@ -431,7 +430,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
}
#endif
- err = -EINVAL;
+ err = -ENOENT;
if (ops == NULL)
goto err_out;
@@ -838,7 +837,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (idx > s_idx)
s_q_idx = 0;
- read_lock_bh(&qdisc_tree_lock);
+ read_lock(&qdisc_tree_lock);
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
@@ -847,12 +846,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
goto done;
}
q_idx++;
}
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
}
done:
@@ -1075,7 +1074,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- read_lock_bh(&qdisc_tree_lock);
+ read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
@@ -1097,7 +1096,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
break;
t++;
}
- read_unlock_bh(&qdisc_tree_lock);
+ read_unlock(&qdisc_tree_lock);
cb->args[0] = t;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ac7cb60d1e25..dbf44da0912f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -3,7 +3,6 @@
/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/string.h>
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 81f0b8346d17..cb0c456aa349 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -11,7 +11,6 @@
* Note: Quantum tunneling is not supported.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6cd81708bf71..bac881bfe362 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -10,7 +10,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -1927,10 +1926,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
}
err = -ENOBUFS;
- cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+ cl = kzalloc(sizeof(*cl), GFP_KERNEL);
if (cl == NULL)
goto failure;
- memset(cl, 0, sizeof(*cl));
cl->R_tab = rtab;
rtab = NULL;
cl->refcnt = 1;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f6320ca70493..11c8a2119b96 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -3,7 +3,6 @@
/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 033083bf0e74..c2689f4ba8de 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -9,7 +9,6 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 74d4a1dceeec..88c6a99ce53c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -14,7 +14,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -46,11 +45,10 @@
The idea is the following:
- enqueue, dequeue are serialized via top level device
spinlock dev->queue_lock.
- - tree walking is protected by read_lock_bh(qdisc_tree_lock)
+ - tree walking is protected by read_lock(qdisc_tree_lock)
and this lock is used only in process context.
- - updates to tree are made under rtnl semaphore or
- from softirq context (__qdisc_destroy rcu-callback)
- hence this lock needs local bh disabling.
+ - updates to tree are made only under rtnl semaphore,
+ hence this lock may be made without local bh disabling.
qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
*/
@@ -58,14 +56,14 @@ DEFINE_RWLOCK(qdisc_tree_lock);
void qdisc_lock_tree(struct net_device *dev)
{
- write_lock_bh(&qdisc_tree_lock);
+ write_lock(&qdisc_tree_lock);
spin_lock_bh(&dev->queue_lock);
}
void qdisc_unlock_tree(struct net_device *dev)
{
spin_unlock_bh(&dev->queue_lock);
- write_unlock_bh(&qdisc_tree_lock);
+ write_unlock(&qdisc_tree_lock);
}
/*
@@ -239,9 +237,7 @@ void __netdev_watchdog_up(struct net_device *dev)
static void dev_watchdog_up(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
__netdev_watchdog_up(dev);
- netif_tx_unlock_bh(dev);
}
static void dev_watchdog_down(struct net_device *dev)
@@ -433,10 +429,9 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
size = QDISC_ALIGN(sizeof(*sch));
size += ops->priv_size + (QDISC_ALIGNTO - 1);
- p = kmalloc(size, GFP_KERNEL);
+ p = kzalloc(size, GFP_KERNEL);
if (!p)
goto errout;
- memset(p, 0, size);
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p;
@@ -487,20 +482,6 @@ void qdisc_reset(struct Qdisc *qdisc)
static void __qdisc_destroy(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
- struct Qdisc_ops *ops = qdisc->ops;
-
-#ifdef CONFIG_NET_ESTIMATOR
- gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
-#endif
- write_lock(&qdisc_tree_lock);
- if (ops->reset)
- ops->reset(qdisc);
- if (ops->destroy)
- ops->destroy(qdisc);
- write_unlock(&qdisc_tree_lock);
- module_put(ops->owner);
-
- dev_put(qdisc->dev);
kfree((char *) qdisc - qdisc->padded);
}
@@ -508,32 +489,23 @@ static void __qdisc_destroy(struct rcu_head *head)
void qdisc_destroy(struct Qdisc *qdisc)
{
- struct list_head cql = LIST_HEAD_INIT(cql);
- struct Qdisc *cq, *q, *n;
+ struct Qdisc_ops *ops = qdisc->ops;
if (qdisc->flags & TCQ_F_BUILTIN ||
- !atomic_dec_and_test(&qdisc->refcnt))
+ !atomic_dec_and_test(&qdisc->refcnt))
return;
- if (!list_empty(&qdisc->list)) {
- if (qdisc->ops->cl_ops == NULL)
- list_del(&qdisc->list);
- else
- list_move(&qdisc->list, &cql);
- }
-
- /* unlink inner qdiscs from dev->qdisc_list immediately */
- list_for_each_entry(cq, &cql, list)
- list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list)
- if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) {
- if (q->ops->cl_ops == NULL)
- list_del_init(&q->list);
- else
- list_move_tail(&q->list, &cql);
- }
- list_for_each_entry_safe(cq, n, &cql, list)
- list_del_init(&cq->list);
+ list_del(&qdisc->list);
+#ifdef CONFIG_NET_ESTIMATOR
+ gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+#endif
+ if (ops->reset)
+ ops->reset(qdisc);
+ if (ops->destroy)
+ ops->destroy(qdisc);
+ module_put(ops->owner);
+ dev_put(qdisc->dev);
call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
@@ -553,15 +525,15 @@ void dev_activate(struct net_device *dev)
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
- write_lock_bh(&qdisc_tree_lock);
+ write_lock(&qdisc_tree_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
- write_unlock_bh(&qdisc_tree_lock);
+ write_unlock(&qdisc_tree_lock);
} else {
qdisc = &noqueue_qdisc;
}
- write_lock_bh(&qdisc_tree_lock);
+ write_lock(&qdisc_tree_lock);
dev->qdisc_sleeping = qdisc;
- write_unlock_bh(&qdisc_tree_lock);
+ write_unlock(&qdisc_tree_lock);
}
if (!netif_carrier_ok(dev))
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 29a2dd9f3029..18e81a8ffb01 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -18,7 +18,6 @@
* For all the glorious comments look at include/net/red.h
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -407,10 +406,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched_data *q;
if (table->tab[dp] == NULL) {
- table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
+ table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
if (table->tab[dp] == NULL)
return -ENOMEM;
- memset(table->tab[dp], 0, sizeof(*q));
}
q = table->tab[dp];
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index f1c7bd29f2cd..6a6735a2ed35 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -50,7 +50,6 @@
*/
#include <linux/kernel.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -1124,10 +1123,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (rsc == NULL && fsc == NULL)
return -EINVAL;
- cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+ cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL);
if (cl == NULL)
return -ENOBUFS;
- memset(cl, 0, sizeof(struct hfsc_class));
if (rsc != NULL)
hfsc_change_rsc(cl, rsc, 0);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3ec95df4a85e..6c058e3660c0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1,4 +1,4 @@
-/* vim: ts=8 sw=8
+/*
* net/sched/sch_htb.c Hierarchical token bucket, feed tree version
*
* This program is free software; you can redistribute it and/or
@@ -27,7 +27,6 @@
*
* $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -69,218 +68,165 @@
one less than their parent.
*/
-#define HTB_HSIZE 16 /* classid hash size */
-#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */
-#define HTB_RATECM 1 /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
-#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
+#define HTB_HSIZE 16 /* classid hash size */
+#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
+#define HTB_RATECM 1 /* whether to use rate computer */
+#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
#endif
-/* debugging support; S is subsystem, these are defined:
- 0 - netlink messages
- 1 - enqueue
- 2 - drop & requeue
- 3 - dequeue main
- 4 - dequeue one prio DRR part
- 5 - dequeue class accounting
- 6 - class overlimit status computation
- 7 - hint tree
- 8 - event queue
- 10 - rate estimator
- 11 - classifier
- 12 - fast dequeue cache
-
- L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
- q->debug uint32 contains 16 2-bit fields one for subsystem starting
- from LSB
- */
-#ifdef HTB_DEBUG
-#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
-#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
- printk(KERN_DEBUG FMT,##ARG)
-#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
-#define HTB_PASSQ q,
-#define HTB_ARGQ struct htb_sched *q,
-#define static
-#undef __inline__
-#define __inline__
-#undef inline
-#define inline
-#define HTB_CMAGIC 0xFEFAFEF1
-#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
- if ((N)->rb_color == -1) break; \
- rb_erase(N,R); \
- (N)->rb_color = -1; } while (0)
-#else
-#define HTB_DBG_COND(S,L) (0)
-#define HTB_DBG(S,L,FMT,ARG...)
-#define HTB_PASSQ
-#define HTB_ARGQ
-#define HTB_CHCL(cl)
-#define htb_safe_rb_erase(N,R) rb_erase(N,R)
-#endif
-
-
/* used internaly to keep status of single class */
enum htb_cmode {
- HTB_CANT_SEND, /* class can't send and can't borrow */
- HTB_MAY_BORROW, /* class can't send but may borrow */
- HTB_CAN_SEND /* class can send */
+ HTB_CANT_SEND, /* class can't send and can't borrow */
+ HTB_MAY_BORROW, /* class can't send but may borrow */
+ HTB_CAN_SEND /* class can send */
};
/* interior & leaf nodes; props specific to leaves are marked L: */
-struct htb_class
-{
-#ifdef HTB_DEBUG
- unsigned magic;
-#endif
- /* general class parameters */
- u32 classid;
- struct gnet_stats_basic bstats;
- struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
- struct tc_htb_xstats xstats;/* our special stats */
- int refcnt; /* usage count of this class */
+struct htb_class {
+ /* general class parameters */
+ u32 classid;
+ struct gnet_stats_basic bstats;
+ struct gnet_stats_queue qstats;
+ struct gnet_stats_rate_est rate_est;
+ struct tc_htb_xstats xstats; /* our special stats */
+ int refcnt; /* usage count of this class */
#ifdef HTB_RATECM
- /* rate measurement counters */
- unsigned long rate_bytes,sum_bytes;
- unsigned long rate_packets,sum_packets;
+ /* rate measurement counters */
+ unsigned long rate_bytes, sum_bytes;
+ unsigned long rate_packets, sum_packets;
#endif
- /* topology */
- int level; /* our level (see above) */
- struct htb_class *parent; /* parent class */
- struct list_head hlist; /* classid hash list item */
- struct list_head sibling; /* sibling list item */
- struct list_head children; /* children list */
-
- union {
- struct htb_class_leaf {
- struct Qdisc *q;
- int prio;
- int aprio;
- int quantum;
- int deficit[TC_HTB_MAXDEPTH];
- struct list_head drop_list;
- } leaf;
- struct htb_class_inner {
- struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
- struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
- /* When class changes from state 1->2 and disconnects from
- parent's feed then we lost ptr value and start from the
- first child again. Here we store classid of the
- last valid ptr (used when ptr is NULL). */
- u32 last_ptr_id[TC_HTB_NUMPRIO];
- } inner;
- } un;
- struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
- struct rb_node pq_node; /* node for event queue */
- unsigned long pq_key; /* the same type as jiffies global */
-
- int prio_activity; /* for which prios are we active */
- enum htb_cmode cmode; /* current mode of the class */
-
- /* class attached filters */
- struct tcf_proto *filter_list;
- int filter_cnt;
-
- int warned; /* only one warning about non work conserving .. */
-
- /* token bucket parameters */
- struct qdisc_rate_table *rate; /* rate table of the class itself */
- struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
- long buffer,cbuffer; /* token bucket depth/rate */
- long mbuffer; /* max wait time */
- long tokens,ctokens; /* current number of tokens */
- psched_time_t t_c; /* checkpoint time */
+ /* topology */
+ int level; /* our level (see above) */
+ struct htb_class *parent; /* parent class */
+ struct hlist_node hlist; /* classid hash list item */
+ struct list_head sibling; /* sibling list item */
+ struct list_head children; /* children list */
+
+ union {
+ struct htb_class_leaf {
+ struct Qdisc *q;
+ int prio;
+ int aprio;
+ int quantum;
+ int deficit[TC_HTB_MAXDEPTH];
+ struct list_head drop_list;
+ } leaf;
+ struct htb_class_inner {
+ struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
+ struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
+ /* When class changes from state 1->2 and disconnects from
+ parent's feed then we lost ptr value and start from the
+ first child again. Here we store classid of the
+ last valid ptr (used when ptr is NULL). */
+ u32 last_ptr_id[TC_HTB_NUMPRIO];
+ } inner;
+ } un;
+ struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
+ struct rb_node pq_node; /* node for event queue */
+ unsigned long pq_key; /* the same type as jiffies global */
+
+ int prio_activity; /* for which prios are we active */
+ enum htb_cmode cmode; /* current mode of the class */
+
+ /* class attached filters */
+ struct tcf_proto *filter_list;
+ int filter_cnt;
+
+ int warned; /* only one warning about non work conserving .. */
+
+ /* token bucket parameters */
+ struct qdisc_rate_table *rate; /* rate table of the class itself */
+ struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
+ long buffer, cbuffer; /* token bucket depth/rate */
+ psched_tdiff_t mbuffer; /* max wait time */
+ long tokens, ctokens; /* current number of tokens */
+ psched_time_t t_c; /* checkpoint time */
};
/* TODO: maybe compute rate when size is too large .. or drop ? */
-static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
- int size)
-{
- int slot = size >> rate->rate.cell_log;
- if (slot > 255) {
- cl->xstats.giants++;
- slot = 255;
- }
- return rate->data[slot];
+static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
+ int size)
+{
+ int slot = size >> rate->rate.cell_log;
+ if (slot > 255) {
+ cl->xstats.giants++;
+ slot = 255;
+ }
+ return rate->data[slot];
}
-struct htb_sched
-{
- struct list_head root; /* root classes list */
- struct list_head hash[HTB_HSIZE]; /* hashed by classid */
- struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */
-
- /* self list - roots of self generating tree */
- struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
- int row_mask[TC_HTB_MAXDEPTH];
- struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
- u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-
- /* self wait list - roots of wait PQs per row */
- struct rb_root wait_pq[TC_HTB_MAXDEPTH];
-
- /* time of nearest event per level (row) */
- unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
- /* cached value of jiffies in dequeue */
- unsigned long jiffies;
-
- /* whether we hit non-work conserving class during this dequeue; we use */
- int nwc_hit; /* this to disable mindelay complaint in dequeue */
-
- int defcls; /* class where unclassified flows go to */
- u32 debug; /* subsystem debug levels */
-
- /* filters for qdisc itself */
- struct tcf_proto *filter_list;
- int filter_cnt;
-
- int rate2quantum; /* quant = rate / rate2quantum */
- psched_time_t now; /* cached dequeue time */
- struct timer_list timer; /* send delay timer */
+struct htb_sched {
+ struct list_head root; /* root classes list */
+ struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
+ struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
+
+ /* self list - roots of self generating tree */
+ struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+ int row_mask[TC_HTB_MAXDEPTH];
+ struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+ u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+
+ /* self wait list - roots of wait PQs per row */
+ struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+
+ /* time of nearest event per level (row) */
+ unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+
+ /* cached value of jiffies in dequeue */
+ unsigned long jiffies;
+
+ /* whether we hit non-work conserving class during this dequeue; we use */
+ int nwc_hit; /* this to disable mindelay complaint in dequeue */
+
+ int defcls; /* class where unclassified flows go to */
+
+ /* filters for qdisc itself */
+ struct tcf_proto *filter_list;
+ int filter_cnt;
+
+ int rate2quantum; /* quant = rate / rate2quantum */
+ psched_time_t now; /* cached dequeue time */
+ struct timer_list timer; /* send delay timer */
#ifdef HTB_RATECM
- struct timer_list rttim; /* rate computer timer */
- int recmp_bucket; /* which hash bucket to recompute next */
+ struct timer_list rttim; /* rate computer timer */
+ int recmp_bucket; /* which hash bucket to recompute next */
#endif
-
- /* non shaped skbs; let them go directly thru */
- struct sk_buff_head direct_queue;
- int direct_qlen; /* max qlen of above */
- long direct_pkts;
+ /* non shaped skbs; let them go directly thru */
+ struct sk_buff_head direct_queue;
+ int direct_qlen; /* max qlen of above */
+
+ long direct_pkts;
};
/* compute hash of size HTB_HSIZE for given handle */
-static __inline__ int htb_hash(u32 h)
+static inline int htb_hash(u32 h)
{
#if HTB_HSIZE != 16
- #error "Declare new hash for your HTB_HSIZE"
+#error "Declare new hash for your HTB_HSIZE"
#endif
- h ^= h>>8; /* stolen from cbq_hash */
- h ^= h>>4;
- return h & 0xf;
+ h ^= h >> 8; /* stolen from cbq_hash */
+ h ^= h >> 4;
+ return h & 0xf;
}
/* find class in global hash table using given handle */
-static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- struct list_head *p;
- if (TC_H_MAJ(handle) != sch->handle)
+ struct hlist_node *p;
+ struct htb_class *cl;
+
+ if (TC_H_MAJ(handle) != sch->handle)
return NULL;
-
- list_for_each (p,q->hash+htb_hash(handle)) {
- struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+
+ hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
if (cl->classid == handle)
return cl;
}
@@ -305,7 +251,8 @@ static inline u32 htb_classid(struct htb_class *cl)
return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
}
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl;
@@ -317,8 +264,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
note that nfmark can be used too by attaching filter fw with no
rules in it */
if (skb->priority == sch->handle)
- return HTB_DIRECT; /* X:0 (direct flow) selected */
- if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
+ return HTB_DIRECT; /* X:0 (direct flow) selected */
+ if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
return cl;
*qerr = NET_XMIT_BYPASS;
@@ -327,7 +274,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
+ case TC_ACT_STOLEN:
*qerr = NET_XMIT_SUCCESS;
case TC_ACT_SHOT:
return NULL;
@@ -336,97 +283,44 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
if (result == TC_POLICE_SHOT)
return HTB_DIRECT;
#endif
- if ((cl = (void*)res.class) == NULL) {
+ if ((cl = (void *)res.class) == NULL) {
if (res.classid == sch->handle)
- return HTB_DIRECT; /* X:0 (direct flow) */
- if ((cl = htb_find(res.classid,sch)) == NULL)
- break; /* filter selected invalid classid */
+ return HTB_DIRECT; /* X:0 (direct flow) */
+ if ((cl = htb_find(res.classid, sch)) == NULL)
+ break; /* filter selected invalid classid */
}
if (!cl->level)
- return cl; /* we hit leaf; return it */
+ return cl; /* we hit leaf; return it */
/* we have got inner class; apply inner filter chain */
tcf = cl->filter_list;
}
/* classification failed; try to use default class */
- cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+ cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
if (!cl || cl->level)
- return HTB_DIRECT; /* bad default .. this is safe bet */
+ return HTB_DIRECT; /* bad default .. this is safe bet */
return cl;
}
-#ifdef HTB_DEBUG
-static void htb_next_rb_node(struct rb_node **n);
-#define HTB_DUMTREE(root,memb) if(root) { \
- struct rb_node *n = (root)->rb_node; \
- while (n->rb_left) n = n->rb_left; \
- while (n) { \
- struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
- printk(" %x",cl->classid); htb_next_rb_node (&n); \
- } }
-
-static void htb_debug_dump (struct htb_sched *q)
-{
- int i,p;
- printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
- /* rows */
- for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
- printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
- for (p=0;p<TC_HTB_NUMPRIO;p++) {
- if (!q->row[i][p].rb_node) continue;
- printk(" p%d:",p);
- HTB_DUMTREE(q->row[i]+p,node[p]);
- }
- printk("\n");
- }
- /* classes */
- for (i = 0; i < HTB_HSIZE; i++) {
- struct list_head *l;
- list_for_each (l,q->hash+i) {
- struct htb_class *cl = list_entry(l,struct htb_class,hlist);
- long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
- printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
- "pa=%x f:",
- cl->classid,cl->cmode,cl->tokens,cl->ctokens,
- cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
- cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
- if (cl->level)
- for (p=0;p<TC_HTB_NUMPRIO;p++) {
- if (!cl->un.inner.feed[p].rb_node) continue;
- printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
- HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
- }
- printk("\n");
- }
- }
-}
-#endif
/**
* htb_add_to_id_tree - adds class to the round robin list
*
* Routine adds class to the list (actually tree) sorted by classid.
* Make sure that class is not already on such list for given prio.
*/
-static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
- struct htb_class *cl,int prio)
+static void htb_add_to_id_tree(struct rb_root *root,
+ struct htb_class *cl, int prio)
{
struct rb_node **p = &root->rb_node, *parent = NULL;
- HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
-#ifdef HTB_DEBUG
- if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
- HTB_CHCL(cl);
- if (*p) {
- struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
- HTB_CHCL(x);
- }
-#endif
+
while (*p) {
- struct htb_class *c; parent = *p;
+ struct htb_class *c;
+ parent = *p;
c = rb_entry(parent, struct htb_class, node[prio]);
- HTB_CHCL(c);
+
if (cl->classid > c->classid)
p = &parent->rb_right;
- else
+ else
p = &parent->rb_left;
}
rb_link_node(&cl->node[prio], parent, p);
@@ -440,17 +334,11 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
* change its mode in cl->pq_key microseconds. Make sure that class is not
* already in the queue.
*/
-static void htb_add_to_wait_tree (struct htb_sched *q,
- struct htb_class *cl,long delay,int debug_hint)
+static void htb_add_to_wait_tree(struct htb_sched *q,
+ struct htb_class *cl, long delay)
{
struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
- HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
-#ifdef HTB_DEBUG
- if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
- HTB_CHCL(cl);
- if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
- printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
-#endif
+
cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
if (cl->pq_key == q->jiffies)
cl->pq_key++;
@@ -458,13 +346,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
/* update the nearest event cache */
if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
q->near_ev_cache[cl->level] = cl->pq_key;
-
+
while (*p) {
- struct htb_class *c; parent = *p;
+ struct htb_class *c;
+ parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
if (time_after_eq(cl->pq_key, c->pq_key))
p = &parent->rb_right;
- else
+ else
p = &parent->rb_left;
}
rb_link_node(&cl->pq_node, parent, p);
@@ -477,7 +366,7 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
* When we are past last key we return NULL.
* Average complexity is 2 steps per call.
*/
-static void htb_next_rb_node(struct rb_node **n)
+static inline void htb_next_rb_node(struct rb_node **n)
{
*n = rb_next(*n);
}
@@ -488,42 +377,51 @@ static void htb_next_rb_node(struct rb_node **n)
* The class is added to row at priorities marked in mask.
* It does nothing if mask == 0.
*/
-static inline void htb_add_class_to_row(struct htb_sched *q,
- struct htb_class *cl,int mask)
+static inline void htb_add_class_to_row(struct htb_sched *q,
+ struct htb_class *cl, int mask)
{
- HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
- cl->classid,mask,q->row_mask[cl->level]);
- HTB_CHCL(cl);
q->row_mask[cl->level] |= mask;
while (mask) {
int prio = ffz(~mask);
mask &= ~(1 << prio);
- htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+ htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
+ }
+}
+
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+ if (!RB_EMPTY_NODE(rb)) {
+ WARN_ON(1);
+ } else {
+ rb_erase(rb, root);
+ RB_CLEAR_NODE(rb);
}
}
+
/**
* htb_remove_class_from_row - removes class from its row
*
* The class is removed from row at priorities marked in mask.
* It does nothing if mask == 0.
*/
-static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
- struct htb_class *cl,int mask)
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+ struct htb_class *cl, int mask)
{
int m = 0;
- HTB_CHCL(cl);
+
while (mask) {
int prio = ffz(~mask);
+
mask &= ~(1 << prio);
- if (q->ptr[cl->level][prio] == cl->node+prio)
- htb_next_rb_node(q->ptr[cl->level]+prio);
- htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
- if (!q->row[cl->level][prio].rb_node)
+ if (q->ptr[cl->level][prio] == cl->node + prio)
+ htb_next_rb_node(q->ptr[cl->level] + prio);
+
+ htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
+ if (!q->row[cl->level][prio].rb_node)
m |= 1 << prio;
}
- HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
- cl->classid,mask,q->row_mask[cl->level],m);
q->row_mask[cl->level] &= ~m;
}
@@ -534,34 +432,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
* for priorities it is participating on. cl->cmode must be new
* (activated) mode. It does nothing if cl->prio_activity == 0.
*/
-static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;
- long m,mask = cl->prio_activity;
- HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
- HTB_CHCL(cl);
+ long m, mask = cl->prio_activity;
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
- HTB_CHCL(p);
- m = mask; while (m) {
+ m = mask;
+ while (m) {
int prio = ffz(~m);
m &= ~(1 << prio);
-
+
if (p->un.inner.feed[prio].rb_node)
/* parent already has its feed in use so that
reset bit in mask as parent is already ok */
mask &= ~(1 << prio);
-
- htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+
+ htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
}
- HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
- p->classid,p->prio_activity,mask,p->cmode);
p->prio_activity |= mask;
- cl = p; p = cl->parent;
- HTB_CHCL(cl);
+ cl = p;
+ p = cl->parent;
+
}
if (cl->cmode == HTB_CAN_SEND && mask)
- htb_add_class_to_row(q,cl,mask);
+ htb_add_class_to_row(q, cl, mask);
}
/**
@@ -574,39 +469,52 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;
- long m,mask = cl->prio_activity;
- HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
- HTB_CHCL(cl);
+ long m, mask = cl->prio_activity;
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
- m = mask; mask = 0;
+ m = mask;
+ mask = 0;
while (m) {
int prio = ffz(~m);
m &= ~(1 << prio);
-
- if (p->un.inner.ptr[prio] == cl->node+prio) {
+
+ if (p->un.inner.ptr[prio] == cl->node + prio) {
/* we are removing child which is pointed to from
parent feed - forget the pointer but remember
classid */
p->un.inner.last_ptr_id[prio] = cl->classid;
p->un.inner.ptr[prio] = NULL;
}
-
- htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
-
- if (!p->un.inner.feed[prio].rb_node)
+
+ htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+ if (!p->un.inner.feed[prio].rb_node)
mask |= 1 << prio;
}
- HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
- p->classid,p->prio_activity,mask,p->cmode);
+
p->prio_activity &= ~mask;
- cl = p; p = cl->parent;
- HTB_CHCL(cl);
+ cl = p;
+ p = cl->parent;
+
}
- if (cl->cmode == HTB_CAN_SEND && mask)
- htb_remove_class_from_row(q,cl,mask);
+ if (cl->cmode == HTB_CAN_SEND && mask)
+ htb_remove_class_from_row(q, cl, mask);
}
+#if HTB_HYSTERESIS
+static inline long htb_lowater(const struct htb_class *cl)
+{
+ return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+ return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+}
+#else
+#define htb_lowater(cl) (0)
+#define htb_hiwater(cl) (0)
+#endif
+
/**
* htb_class_mode - computes and returns current class mode
*
@@ -618,28 +526,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
* 0 .. -cl->{c,}buffer range. It is meant to limit number of
* mode transitions per time unit. The speed gain is about 1/6.
*/
-static __inline__ enum htb_cmode
-htb_class_mode(struct htb_class *cl,long *diff)
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
{
- long toks;
+ long toks;
- if ((toks = (cl->ctokens + *diff)) < (
-#if HTB_HYSTERESIS
- cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
-#endif
- 0)) {
- *diff = -toks;
- return HTB_CANT_SEND;
- }
- if ((toks = (cl->tokens + *diff)) >= (
-#if HTB_HYSTERESIS
- cl->cmode == HTB_CAN_SEND ? -cl->buffer :
-#endif
- 0))
- return HTB_CAN_SEND;
+ if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+ *diff = -toks;
+ return HTB_CANT_SEND;
+ }
+
+ if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+ return HTB_CAN_SEND;
- *diff = -toks;
- return HTB_MAY_BORROW;
+ *diff = -toks;
+ return HTB_MAY_BORROW;
}
/**
@@ -651,24 +552,21 @@ htb_class_mode(struct htb_class *cl,long *diff)
* be different from old one and cl->pq_key has to be valid if changing
* to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
*/
-static void
+static void
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
-{
- enum htb_cmode new_mode = htb_class_mode(cl,diff);
-
- HTB_CHCL(cl);
- HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
+{
+ enum htb_cmode new_mode = htb_class_mode(cl, diff);
if (new_mode == cl->cmode)
- return;
-
- if (cl->prio_activity) { /* not necessary: speed optimization */
- if (cl->cmode != HTB_CANT_SEND)
- htb_deactivate_prios(q,cl);
+ return;
+
+ if (cl->prio_activity) { /* not necessary: speed optimization */
+ if (cl->cmode != HTB_CANT_SEND)
+ htb_deactivate_prios(q, cl);
cl->cmode = new_mode;
- if (new_mode != HTB_CANT_SEND)
- htb_activate_prios(q,cl);
- } else
+ if (new_mode != HTB_CANT_SEND)
+ htb_activate_prios(q, cl);
+ } else
cl->cmode = new_mode;
}
@@ -679,14 +577,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
* for the prio. It can be called on already active leaf safely.
* It also adds leaf into droplist.
*/
-static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
- HTB_CHCL(cl);
+
if (!cl->prio_activity) {
cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
- htb_activate_prios(q,cl);
- list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+ htb_activate_prios(q, cl);
+ list_add_tail(&cl->un.leaf.drop_list,
+ q->drops + cl->un.leaf.aprio);
}
}
@@ -696,120 +595,120 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
* Make sure that leaf is active. In the other words it can't be called
* with non-active leaf. It also removes class from the drop list.
*/
-static __inline__ void
-htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
BUG_TRAP(cl->prio_activity);
- HTB_CHCL(cl);
- htb_deactivate_prios(q,cl);
+
+ htb_deactivate_prios(q, cl);
cl->prio_activity = 0;
list_del_init(&cl->un.leaf.drop_list);
}
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- int ret;
- struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = htb_classify(skb,sch,&ret);
-
- if (cl == HTB_DIRECT) {
- /* enqueue to helper queue */
- if (q->direct_queue.qlen < q->direct_qlen) {
- __skb_queue_tail(&q->direct_queue, skb);
- q->direct_pkts++;
- } else {
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
- }
+ int ret;
+ struct htb_sched *q = qdisc_priv(sch);
+ struct htb_class *cl = htb_classify(skb, sch, &ret);
+
+ if (cl == HTB_DIRECT) {
+ /* enqueue to helper queue */
+ if (q->direct_queue.qlen < q->direct_qlen) {
+ __skb_queue_tail(&q->direct_queue, skb);
+ q->direct_pkts++;
+ } else {
+ kfree_skb(skb);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+ }
#ifdef CONFIG_NET_CLS_ACT
- } else if (!cl) {
- if (ret == NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb (skb);
- return ret;
+ } else if (!cl) {
+ if (ret == NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
#endif
- } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
- } else {
- cl->bstats.packets++; cl->bstats.bytes += skb->len;
- htb_activate (q,cl);
- }
-
- sch->q.qlen++;
- sch->bstats.packets++; sch->bstats.bytes += skb->len;
- HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
- return NET_XMIT_SUCCESS;
+ } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
+ NET_XMIT_SUCCESS) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ return NET_XMIT_DROP;
+ } else {
+ cl->bstats.packets++;
+ cl->bstats.bytes += skb->len;
+ htb_activate(q, cl);
+ }
+
+ sch->q.qlen++;
+ sch->bstats.packets++;
+ sch->bstats.bytes += skb->len;
+ return NET_XMIT_SUCCESS;
}
/* TODO: requeuing packet charges it to policers again !! */
static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
{
- struct htb_sched *q = qdisc_priv(sch);
- int ret = NET_XMIT_SUCCESS;
- struct htb_class *cl = htb_classify(skb,sch, &ret);
- struct sk_buff *tskb;
-
- if (cl == HTB_DIRECT || !cl) {
- /* enqueue to helper queue */
- if (q->direct_queue.qlen < q->direct_qlen && cl) {
- __skb_queue_head(&q->direct_queue, skb);
- } else {
- __skb_queue_head(&q->direct_queue, skb);
- tskb = __skb_dequeue_tail(&q->direct_queue);
- kfree_skb (tskb);
- sch->qstats.drops++;
- return NET_XMIT_CN;
- }
- } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
- } else
- htb_activate (q,cl);
-
- sch->q.qlen++;
- sch->qstats.requeues++;
- HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
- return NET_XMIT_SUCCESS;
+ struct htb_sched *q = qdisc_priv(sch);
+ int ret = NET_XMIT_SUCCESS;
+ struct htb_class *cl = htb_classify(skb, sch, &ret);
+ struct sk_buff *tskb;
+
+ if (cl == HTB_DIRECT || !cl) {
+ /* enqueue to helper queue */
+ if (q->direct_queue.qlen < q->direct_qlen && cl) {
+ __skb_queue_head(&q->direct_queue, skb);
+ } else {
+ __skb_queue_head(&q->direct_queue, skb);
+ tskb = __skb_dequeue_tail(&q->direct_queue);
+ kfree_skb(tskb);
+ sch->qstats.drops++;
+ return NET_XMIT_CN;
+ }
+ } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+ NET_XMIT_SUCCESS) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ return NET_XMIT_DROP;
+ } else
+ htb_activate(q, cl);
+
+ sch->q.qlen++;
+ sch->qstats.requeues++;
+ return NET_XMIT_SUCCESS;
}
static void htb_timer(unsigned long arg)
{
- struct Qdisc *sch = (struct Qdisc*)arg;
- sch->flags &= ~TCQ_F_THROTTLED;
- wmb();
- netif_schedule(sch->dev);
+ struct Qdisc *sch = (struct Qdisc *)arg;
+ sch->flags &= ~TCQ_F_THROTTLED;
+ wmb();
+ netif_schedule(sch->dev);
}
#ifdef HTB_RATECM
#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
static void htb_rate_timer(unsigned long arg)
{
- struct Qdisc *sch = (struct Qdisc*)arg;
+ struct Qdisc *sch = (struct Qdisc *)arg;
struct htb_sched *q = qdisc_priv(sch);
- struct list_head *p;
+ struct hlist_node *p;
+ struct htb_class *cl;
+
/* lock queue so that we can muck with it */
- HTB_QLOCK(sch);
- HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
+ spin_lock_bh(&sch->dev->queue_lock);
q->rttim.expires = jiffies + HZ;
add_timer(&q->rttim);
/* scan and recompute one bucket at time */
- if (++q->recmp_bucket >= HTB_HSIZE)
+ if (++q->recmp_bucket >= HTB_HSIZE)
q->recmp_bucket = 0;
- list_for_each (p,q->hash+q->recmp_bucket) {
- struct htb_class *cl = list_entry(p,struct htb_class,hlist);
- HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
- cl->classid,cl->sum_bytes,cl->sum_packets);
- RT_GEN (cl->sum_bytes,cl->rate_bytes);
- RT_GEN (cl->sum_packets,cl->rate_packets);
+
+ hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
+ RT_GEN(cl->sum_bytes, cl->rate_bytes);
+ RT_GEN(cl->sum_packets, cl->rate_packets);
}
- HTB_QUNLOCK(sch);
+ spin_unlock_bh(&sch->dev->queue_lock);
}
#endif
@@ -824,12 +723,11 @@ static void htb_rate_timer(unsigned long arg)
* CAN_SEND) because we can use more precise clock that event queue here.
* In such case we remove class from event queue first.
*/
-static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
- int level,int bytes)
-{
- long toks,diff;
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+ int level, int bytes)
+{
+ long toks, diff;
enum htb_cmode old_mode;
- HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
if (toks > cl->B) toks = cl->B; \
@@ -838,47 +736,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
cl->T = toks
while (cl) {
- HTB_CHCL(cl);
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
- if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
- if (net_ratelimit())
- printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
- cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
- q->now.tv_sec * 1000000ULL + q->now.tv_usec,
- cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
- (unsigned long long) q->now,
- (unsigned long long) cl->t_c,
-#endif
- q->jiffies);
- diff = 1000;
- }
-#endif
+ diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
if (cl->level >= level) {
- if (cl->level == level) cl->xstats.lends++;
- HTB_ACCNT (tokens,buffer,rate);
+ if (cl->level == level)
+ cl->xstats.lends++;
+ HTB_ACCNT(tokens, buffer, rate);
} else {
cl->xstats.borrows++;
- cl->tokens += diff; /* we moved t_c; update tokens */
+ cl->tokens += diff; /* we moved t_c; update tokens */
}
- HTB_ACCNT (ctokens,cbuffer,ceil);
+ HTB_ACCNT(ctokens, cbuffer, ceil);
cl->t_c = q->now;
- HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
- old_mode = cl->cmode; diff = 0;
- htb_change_class_mode(q,cl,&diff);
+ old_mode = cl->cmode;
+ diff = 0;
+ htb_change_class_mode(q, cl, &diff);
if (old_mode != cl->cmode) {
if (old_mode != HTB_CAN_SEND)
- htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+ htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
if (cl->cmode != HTB_CAN_SEND)
- htb_add_to_wait_tree (q,cl,diff,1);
+ htb_add_to_wait_tree(q, cl, diff);
}
-
#ifdef HTB_RATECM
/* update rate counters */
- cl->sum_bytes += bytes; cl->sum_packets++;
+ cl->sum_bytes += bytes;
+ cl->sum_packets++;
#endif
/* update byte stats except for leaves which are already updated */
@@ -897,60 +779,46 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
* next pending event (0 for no event in pq).
* Note: Aplied are events whose have cl->pq_key <= jiffies.
*/
-static long htb_do_events(struct htb_sched *q,int level)
+static long htb_do_events(struct htb_sched *q, int level)
{
int i;
- HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
- level,q->wait_pq[level].rb_node,q->row_mask[level]);
+
for (i = 0; i < 500; i++) {
struct htb_class *cl;
long diff;
struct rb_node *p = q->wait_pq[level].rb_node;
- if (!p) return 0;
- while (p->rb_left) p = p->rb_left;
+ if (!p)
+ return 0;
+ while (p->rb_left)
+ p = p->rb_left;
cl = rb_entry(p, struct htb_class, pq_node);
if (time_after(cl->pq_key, q->jiffies)) {
- HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
return cl->pq_key - q->jiffies;
}
- htb_safe_rb_erase(p,q->wait_pq+level);
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
- if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
- if (net_ratelimit())
- printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
- cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
- q->now.tv_sec * 1000000ULL + q->now.tv_usec,
- cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
- (unsigned long long) q->now,
- (unsigned long long) cl->t_c,
-#endif
- q->jiffies);
- diff = 1000;
- }
-#endif
- htb_change_class_mode(q,cl,&diff);
+ htb_safe_rb_erase(p, q->wait_pq + level);
+ diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+ htb_change_class_mode(q, cl, &diff);
if (cl->cmode != HTB_CAN_SEND)
- htb_add_to_wait_tree (q,cl,diff,2);
+ htb_add_to_wait_tree(q, cl, diff);
}
if (net_ratelimit())
printk(KERN_WARNING "htb: too many events !\n");
- return HZ/10;
+ return HZ / 10;
}
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
is no such one exists. */
-static struct rb_node *
-htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+ u32 id)
{
struct rb_node *r = NULL;
while (n) {
- struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
- if (id == cl->classid) return n;
-
+ struct htb_class *cl =
+ rb_entry(n, struct htb_class, node[prio]);
+ if (id == cl->classid)
+ return n;
+
if (id > cl->classid) {
n = n->rb_right;
} else {
@@ -966,49 +834,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
*
* Find leaf where current feed pointers points to.
*/
-static struct htb_class *
-htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+ struct rb_node **pptr, u32 * pid)
{
int i;
struct {
struct rb_node *root;
struct rb_node **pptr;
u32 *pid;
- } stk[TC_HTB_MAXDEPTH],*sp = stk;
-
+ } stk[TC_HTB_MAXDEPTH], *sp = stk;
+
BUG_TRAP(tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
sp->pid = pid;
for (i = 0; i < 65535; i++) {
- HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
-
- if (!*sp->pptr && *sp->pid) {
+ if (!*sp->pptr && *sp->pid) {
/* ptr was invalidated but id is valid - try to recover
the original or next ptr */
- *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+ *sp->pptr =
+ htb_id_find_next_upper(prio, sp->root, *sp->pid);
}
- *sp->pid = 0; /* ptr is valid now so that remove this hint as it
- can become out of date quickly */
- if (!*sp->pptr) { /* we are at right end; rewind & go up */
+ *sp->pid = 0; /* ptr is valid now so that remove this hint as it
+ can become out of date quickly */
+ if (!*sp->pptr) { /* we are at right end; rewind & go up */
*sp->pptr = sp->root;
- while ((*sp->pptr)->rb_left)
+ while ((*sp->pptr)->rb_left)
*sp->pptr = (*sp->pptr)->rb_left;
if (sp > stk) {
sp--;
- BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
- htb_next_rb_node (sp->pptr);
+ BUG_TRAP(*sp->pptr);
+ if (!*sp->pptr)
+ return NULL;
+ htb_next_rb_node(sp->pptr);
}
} else {
struct htb_class *cl;
- cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
- HTB_CHCL(cl);
- if (!cl->level)
+ cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+ if (!cl->level)
return cl;
(++sp)->root = cl->un.inner.feed[prio].rb_node;
- sp->pptr = cl->un.inner.ptr+prio;
- sp->pid = cl->un.inner.last_ptr_id+prio;
+ sp->pptr = cl->un.inner.ptr + prio;
+ sp->pid = cl->un.inner.last_ptr_id + prio;
}
}
BUG_TRAP(0);
@@ -1017,21 +885,21 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32
/* dequeues packet at given priority and level; call only if
you are sure that there is active class at prio/level */
-static struct sk_buff *
-htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+ int level)
{
struct sk_buff *skb = NULL;
- struct htb_class *cl,*start;
+ struct htb_class *cl, *start;
/* look initial class up in the row */
- start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
- q->ptr[level]+prio,q->last_ptr_id[level]+prio);
-
+ start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+ q->ptr[level] + prio,
+ q->last_ptr_id[level] + prio);
+
do {
next:
- BUG_TRAP(cl);
- if (!cl) return NULL;
- HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
- prio,level,cl->classid,cl->un.leaf.deficit[level]);
+ BUG_TRAP(cl);
+ if (!cl)
+ return NULL;
/* class can be empty - it is unlikely but can be true if leaf
qdisc drops packets in enqueue routine or if someone used
@@ -1039,64 +907,69 @@ next:
simply deactivate and skip such class */
if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
struct htb_class *next;
- htb_deactivate(q,cl);
+ htb_deactivate(q, cl);
/* row/level might become empty */
if ((q->row_mask[level] & (1 << prio)) == 0)
- return NULL;
-
- next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
- prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+ return NULL;
+
+ next = htb_lookup_leaf(q->row[level] + prio,
+ prio, q->ptr[level] + prio,
+ q->last_ptr_id[level] + prio);
- if (cl == start) /* fix start if we just deleted it */
+ if (cl == start) /* fix start if we just deleted it */
start = next;
cl = next;
goto next;
}
-
- if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
+
+ skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+ if (likely(skb != NULL))
break;
if (!cl->warned) {
- printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
+ printk(KERN_WARNING
+ "htb: class %X isn't work conserving ?!\n",
+ cl->classid);
cl->warned = 1;
}
q->nwc_hit++;
- htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
- cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
- q->last_ptr_id[level]+prio);
+ htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+ ptr[0]) + prio);
+ cl = htb_lookup_leaf(q->row[level] + prio, prio,
+ q->ptr[level] + prio,
+ q->last_ptr_id[level] + prio);
} while (cl != start);
if (likely(skb != NULL)) {
if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
- HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
- level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
- htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+ htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+ ptr[0]) + prio);
}
/* this used to be after charge_class but this constelation
gives us slightly better performance */
if (!cl->un.leaf.q->q.qlen)
- htb_deactivate (q,cl);
- htb_charge_class (q,cl,level,skb->len);
+ htb_deactivate(q, cl);
+ htb_charge_class(q, cl, level, skb->len);
}
return skb;
}
-static void htb_delay_by(struct Qdisc *sch,long delay)
+static void htb_delay_by(struct Qdisc *sch, long delay)
{
struct htb_sched *q = qdisc_priv(sch);
- if (delay <= 0) delay = 1;
- if (unlikely(delay > 5*HZ)) {
+ if (delay <= 0)
+ delay = 1;
+ if (unlikely(delay > 5 * HZ)) {
if (net_ratelimit())
printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
- delay = 5*HZ;
+ delay = 5 * HZ;
}
/* why don't use jiffies here ? because expires can be in past */
mod_timer(&q->timer, q->jiffies + delay);
sch->flags |= TCQ_F_THROTTLED;
sch->qstats.overlimits++;
- HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
}
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
@@ -1105,22 +978,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
struct htb_sched *q = qdisc_priv(sch);
int level;
long min_delay;
-#ifdef HTB_DEBUG
- int evs_used = 0;
-#endif
q->jiffies = jiffies;
- HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
- sch->q.qlen);
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
- if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+ skb = __skb_dequeue(&q->direct_queue);
+ if (skb != NULL) {
sch->flags &= ~TCQ_F_THROTTLED;
sch->q.qlen--;
return skb;
}
- if (!sch->q.qlen) goto fin;
+ if (!sch->q.qlen)
+ goto fin;
PSCHED_GET_TIME(q->now);
min_delay = LONG_MAX;
@@ -1130,21 +1000,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
int m;
long delay;
if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
- delay = htb_do_events(q,level);
- q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
-#ifdef HTB_DEBUG
- evs_used++;
-#endif
+ delay = htb_do_events(q, level);
+ q->near_ev_cache[level] =
+ q->jiffies + (delay ? delay : HZ);
} else
- delay = q->near_ev_cache[level] - q->jiffies;
-
- if (delay && min_delay > delay)
+ delay = q->near_ev_cache[level] - q->jiffies;
+
+ if (delay && min_delay > delay)
min_delay = delay;
m = ~q->row_mask[level];
while (m != (int)(-1)) {
- int prio = ffz (m);
+ int prio = ffz(m);
m |= 1 << prio;
- skb = htb_dequeue_tree(q,prio,level);
+ skb = htb_dequeue_tree(q, prio, level);
if (likely(skb != NULL)) {
sch->q.qlen--;
sch->flags &= ~TCQ_F_THROTTLED;
@@ -1152,40 +1020,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
}
}
}
-#ifdef HTB_DEBUG
- if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
- if (min_delay == LONG_MAX) {
- printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
- evs_used,q->jiffies,jiffies);
- htb_debug_dump(q);
- } else
- printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
- "too small rate\n",min_delay);
- }
-#endif
- htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+ htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
fin:
- HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
return skb;
}
/* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
int prio;
for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
struct list_head *p;
- list_for_each (p,q->drops+prio) {
+ list_for_each(p, q->drops + prio) {
struct htb_class *cl = list_entry(p, struct htb_class,
un.leaf.drop_list);
unsigned int len;
- if (cl->un.leaf.q->ops->drop &&
- (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+ if (cl->un.leaf.q->ops->drop &&
+ (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
- htb_deactivate (q,cl);
+ htb_deactivate(q, cl);
return len;
}
}
@@ -1195,29 +1051,25 @@ static unsigned int htb_drop(struct Qdisc* sch)
/* reset all classes */
/* always caled under BH & queue lock */
-static void htb_reset(struct Qdisc* sch)
+static void htb_reset(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
int i;
- HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
for (i = 0; i < HTB_HSIZE; i++) {
- struct list_head *p;
- list_for_each (p,q->hash+i) {
- struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+ struct hlist_node *p;
+ struct htb_class *cl;
+
+ hlist_for_each_entry(cl, p, q->hash + i, hlist) {
if (cl->level)
- memset(&cl->un.inner,0,sizeof(cl->un.inner));
+ memset(&cl->un.inner, 0, sizeof(cl->un.inner));
else {
- if (cl->un.leaf.q)
+ if (cl->un.leaf.q)
qdisc_reset(cl->un.leaf.q);
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
-#ifdef HTB_DEBUG
- cl->pq_node.rb_color = -1;
- memset(cl->node,255,sizeof(cl->node));
-#endif
}
}
@@ -1225,12 +1077,12 @@ static void htb_reset(struct Qdisc* sch)
del_timer(&q->timer);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
- memset(q->row,0,sizeof(q->row));
- memset(q->row_mask,0,sizeof(q->row_mask));
- memset(q->wait_pq,0,sizeof(q->wait_pq));
- memset(q->ptr,0,sizeof(q->ptr));
+ memset(q->row, 0, sizeof(q->row));
+ memset(q->row_mask, 0, sizeof(q->row_mask));
+ memset(q->wait_pq, 0, sizeof(q->wait_pq));
+ memset(q->ptr, 0, sizeof(q->ptr));
for (i = 0; i < TC_HTB_NUMPRIO; i++)
- INIT_LIST_HEAD(q->drops+i);
+ INIT_LIST_HEAD(q->drops + i);
}
static int htb_init(struct Qdisc *sch, struct rtattr *opt)
@@ -1239,36 +1091,31 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
struct rtattr *tb[TCA_HTB_INIT];
struct tc_htb_glob *gopt;
int i;
-#ifdef HTB_DEBUG
- printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
- HTB_VER >> 16,HTB_VER & 0xffff);
-#endif
if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
- tb[TCA_HTB_INIT-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+ tb[TCA_HTB_INIT - 1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
return -EINVAL;
}
- gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+ gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
if (gopt->version != HTB_VER >> 16) {
- printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
- HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+ printk(KERN_ERR
+ "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+ HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
return -EINVAL;
}
- q->debug = gopt->debug;
- HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
INIT_LIST_HEAD(&q->root);
for (i = 0; i < HTB_HSIZE; i++)
- INIT_LIST_HEAD(q->hash+i);
+ INIT_HLIST_HEAD(q->hash + i);
for (i = 0; i < TC_HTB_NUMPRIO; i++)
- INIT_LIST_HEAD(q->drops+i);
+ INIT_LIST_HEAD(q->drops + i);
init_timer(&q->timer);
skb_queue_head_init(&q->direct_queue);
q->direct_qlen = sch->dev->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+ if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
q->direct_qlen = 2;
q->timer.function = htb_timer;
q->timer.data = (unsigned long)sch;
@@ -1290,80 +1137,72 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct htb_sched *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb->tail;
struct rtattr *rta;
struct tc_htb_glob gopt;
- HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
- HTB_QLOCK(sch);
+ spin_lock_bh(&sch->dev->queue_lock);
gopt.direct_pkts = q->direct_pkts;
-#ifdef HTB_DEBUG
- if (HTB_DBG_COND(0,2))
- htb_debug_dump(q);
-#endif
gopt.version = HTB_VER;
gopt.rate2quantum = q->rate2quantum;
gopt.defcls = q->defcls;
- gopt.debug = q->debug;
- rta = (struct rtattr*)b;
+ gopt.debug = 0;
+ rta = (struct rtattr *)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
rta->rta_len = skb->tail - b;
- HTB_QUNLOCK(sch);
+ spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
- HTB_QUNLOCK(sch);
+ spin_unlock_bh(&sch->dev->queue_lock);
skb_trim(skb, skb->tail - skb->data);
return -1;
}
static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
- struct sk_buff *skb, struct tcmsg *tcm)
+ struct sk_buff *skb, struct tcmsg *tcm)
{
-#ifdef HTB_DEBUG
- struct htb_sched *q = qdisc_priv(sch);
-#endif
- struct htb_class *cl = (struct htb_class*)arg;
- unsigned char *b = skb->tail;
+ struct htb_class *cl = (struct htb_class *)arg;
+ unsigned char *b = skb->tail;
struct rtattr *rta;
struct tc_htb_opt opt;
- HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
-
- HTB_QLOCK(sch);
+ spin_lock_bh(&sch->dev->queue_lock);
tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
tcm->tcm_handle = cl->classid;
if (!cl->level && cl->un.leaf.q)
tcm->tcm_info = cl->un.leaf.q->handle;
- rta = (struct rtattr*)b;
+ rta = (struct rtattr *)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
- memset (&opt,0,sizeof(opt));
+ memset(&opt, 0, sizeof(opt));
- opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
- opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
- opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
- opt.level = cl->level;
+ opt.rate = cl->rate->rate;
+ opt.buffer = cl->buffer;
+ opt.ceil = cl->ceil->rate;
+ opt.cbuffer = cl->cbuffer;
+ opt.quantum = cl->un.leaf.quantum;
+ opt.prio = cl->un.leaf.prio;
+ opt.level = cl->level;
RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
rta->rta_len = skb->tail - b;
- HTB_QUNLOCK(sch);
+ spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
- HTB_QUNLOCK(sch);
+ spin_unlock_bh(&sch->dev->queue_lock);
skb_trim(skb, b - skb->data);
return -1;
}
static int
-htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
{
- struct htb_class *cl = (struct htb_class*)arg;
+ struct htb_class *cl = (struct htb_class *)arg;
#ifdef HTB_RATECM
- cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
- cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
+ cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
+ cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
#endif
if (!cl->level && cl->un.leaf.q)
@@ -1380,21 +1219,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
}
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old)
{
- struct htb_class *cl = (struct htb_class*)arg;
+ struct htb_class *cl = (struct htb_class *)arg;
if (cl && !cl->level) {
- if (new == NULL && (new = qdisc_create_dflt(sch->dev,
- &pfifo_qdisc_ops)) == NULL)
- return -ENOBUFS;
+ if (new == NULL && (new = qdisc_create_dflt(sch->dev,
+ &pfifo_qdisc_ops))
+ == NULL)
+ return -ENOBUFS;
sch_tree_lock(sch);
if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
if (cl->prio_activity)
- htb_deactivate (qdisc_priv(sch),cl);
+ htb_deactivate(qdisc_priv(sch), cl);
/* TODO: is it correct ? Why CBQ doesn't do it ? */
- sch->q.qlen -= (*old)->q.qlen;
+ sch->q.qlen -= (*old)->q.qlen;
qdisc_reset(*old);
}
sch_tree_unlock(sch);
@@ -1403,20 +1243,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
return -ENOENT;
}
-static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
{
- struct htb_class *cl = (struct htb_class*)arg;
+ struct htb_class *cl = (struct htb_class *)arg;
return (cl && !cl->level) ? cl->un.leaf.q : NULL;
}
static unsigned long htb_get(struct Qdisc *sch, u32 classid)
{
-#ifdef HTB_DEBUG
- struct htb_sched *q = qdisc_priv(sch);
-#endif
- struct htb_class *cl = htb_find(classid,sch);
- HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
- if (cl)
+ struct htb_class *cl = htb_find(classid, sch);
+ if (cl)
cl->refcnt++;
return (unsigned long)cl;
}
@@ -1431,10 +1267,9 @@ static void htb_destroy_filters(struct tcf_proto **fl)
}
}
-static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
struct htb_sched *q = qdisc_priv(sch);
- HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
if (!cl->level) {
BUG_TRAP(cl->un.leaf.q);
sch->q.qlen -= cl->un.leaf.q->q.qlen;
@@ -1442,45 +1277,45 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
}
qdisc_put_rtab(cl->rate);
qdisc_put_rtab(cl->ceil);
-
- htb_destroy_filters (&cl->filter_list);
-
- while (!list_empty(&cl->children))
- htb_destroy_class (sch,list_entry(cl->children.next,
- struct htb_class,sibling));
+
+ htb_destroy_filters(&cl->filter_list);
+
+ while (!list_empty(&cl->children))
+ htb_destroy_class(sch, list_entry(cl->children.next,
+ struct htb_class, sibling));
/* note: this delete may happen twice (see htb_delete) */
- list_del(&cl->hlist);
+ if (!hlist_unhashed(&cl->hlist))
+ hlist_del(&cl->hlist);
list_del(&cl->sibling);
-
+
if (cl->prio_activity)
- htb_deactivate (q,cl);
-
+ htb_deactivate(q, cl);
+
if (cl->cmode != HTB_CAN_SEND)
- htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
-
+ htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
kfree(cl);
}
/* always caled under BH & queue lock */
-static void htb_destroy(struct Qdisc* sch)
+static void htb_destroy(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- HTB_DBG(0,1,"htb_destroy q=%p\n",q);
- del_timer_sync (&q->timer);
+ del_timer_sync(&q->timer);
#ifdef HTB_RATECM
- del_timer_sync (&q->rttim);
+ del_timer_sync(&q->rttim);
#endif
/* This line used to be after htb_destroy_class call below
and surprisingly it worked in 2.4. But it must precede it
because filter need its target class alive to be able to call
unbind_filter on it (without Oops). */
htb_destroy_filters(&q->filter_list);
-
- while (!list_empty(&q->root))
- htb_destroy_class (sch,list_entry(q->root.next,
- struct htb_class,sibling));
+
+ while (!list_empty(&q->root))
+ htb_destroy_class(sch, list_entry(q->root.next,
+ struct htb_class, sibling));
__skb_queue_purge(&q->direct_queue);
}
@@ -1488,24 +1323,25 @@ static void htb_destroy(struct Qdisc* sch)
static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = (struct htb_class*)arg;
- HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+ struct htb_class *cl = (struct htb_class *)arg;
// TODO: why don't allow to delete subtree ? references ? does
// tc subsys quarantee us that in htb_destroy it holds no class
// refs so that we can remove children safely there ?
if (!list_empty(&cl->children) || cl->filter_cnt)
return -EBUSY;
-
+
sch_tree_lock(sch);
-
+
/* delete from hash and active; remainder in destroy_class */
- list_del_init(&cl->hlist);
+ if (!hlist_unhashed(&cl->hlist))
+ hlist_del(&cl->hlist);
+
if (cl->prio_activity)
- htb_deactivate (q,cl);
+ htb_deactivate(q, cl);
if (--cl->refcnt == 0)
- htb_destroy_class(sch,cl);
+ htb_destroy_class(sch, cl);
sch_tree_unlock(sch);
return 0;
@@ -1513,45 +1349,46 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
static void htb_put(struct Qdisc *sch, unsigned long arg)
{
-#ifdef HTB_DEBUG
- struct htb_sched *q = qdisc_priv(sch);
-#endif
- struct htb_class *cl = (struct htb_class*)arg;
- HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+ struct htb_class *cl = (struct htb_class *)arg;
if (--cl->refcnt == 0)
- htb_destroy_class(sch,cl);
+ htb_destroy_class(sch, cl);
}
-static int htb_change_class(struct Qdisc *sch, u32 classid,
- u32 parentid, struct rtattr **tca, unsigned long *arg)
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+ u32 parentid, struct rtattr **tca,
+ unsigned long *arg)
{
int err = -EINVAL;
struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = (struct htb_class*)*arg,*parent;
- struct rtattr *opt = tca[TCA_OPTIONS-1];
+ struct htb_class *cl = (struct htb_class *)*arg, *parent;
+ struct rtattr *opt = tca[TCA_OPTIONS - 1];
struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct rtattr *tb[TCA_HTB_RTAB];
struct tc_htb_opt *hopt;
/* extract all subattrs from opt attr */
if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
- tb[TCA_HTB_PARMS-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+ tb[TCA_HTB_PARMS - 1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
goto failure;
-
- parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
- hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
- HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
- rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
- ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
- if (!rtab || !ctab) goto failure;
+ parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
+
+ hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
+
+ rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
+ ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
+ if (!rtab || !ctab)
+ goto failure;
- if (!cl) { /* new class */
+ if (!cl) { /* new class */
struct Qdisc *new_q;
+ int prio;
+
/* check for valid classid */
- if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+ if (!classid || TC_H_MAJ(classid ^ sch->handle)
+ || htb_find(classid, sch))
goto failure;
/* check maximal depth */
@@ -1560,18 +1397,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
goto failure;
}
err = -ENOBUFS;
- if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+ if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
goto failure;
-
- memset(cl, 0, sizeof(*cl));
+
cl->refcnt = 1;
INIT_LIST_HEAD(&cl->sibling);
- INIT_LIST_HEAD(&cl->hlist);
+ INIT_HLIST_NODE(&cl->hlist);
INIT_LIST_HEAD(&cl->children);
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-#ifdef HTB_DEBUG
- cl->magic = HTB_CMAGIC;
-#endif
+ RB_CLEAR_NODE(&cl->pq_node);
+
+ for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+ RB_CLEAR_NODE(&cl->node[prio]);
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
so that can't be used inside of sch_tree_lock
@@ -1581,53 +1418,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (parent && !parent->level) {
/* turn parent into inner node */
sch->q.qlen -= parent->un.leaf.q->q.qlen;
- qdisc_destroy (parent->un.leaf.q);
- if (parent->prio_activity)
- htb_deactivate (q,parent);
+ qdisc_destroy(parent->un.leaf.q);
+ if (parent->prio_activity)
+ htb_deactivate(q, parent);
/* remove from evt list because of level change */
if (parent->cmode != HTB_CAN_SEND) {
- htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+ htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
parent->cmode = HTB_CAN_SEND;
}
parent->level = (parent->parent ? parent->parent->level
- : TC_HTB_MAXDEPTH) - 1;
- memset (&parent->un.inner,0,sizeof(parent->un.inner));
+ : TC_HTB_MAXDEPTH) - 1;
+ memset(&parent->un.inner, 0, sizeof(parent->un.inner));
}
/* leaf (we) needs elementary qdisc */
cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
- cl->classid = classid; cl->parent = parent;
+ cl->classid = classid;
+ cl->parent = parent;
/* set class to be in HTB_CAN_SEND state */
cl->tokens = hopt->buffer;
cl->ctokens = hopt->cbuffer;
- cl->mbuffer = 60000000; /* 1min */
+ cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
PSCHED_GET_TIME(cl->t_c);
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
- list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
- list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-#ifdef HTB_DEBUG
- {
- int i;
- for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
- cl->pq_node.rb_color = -1;
- }
-#endif
- } else sch_tree_lock(sch);
+ hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+ list_add_tail(&cl->sibling,
+ parent ? &parent->children : &q->root);
+ } else
+ sch_tree_lock(sch);
/* it used to be a nasty bug here, we have to check that node
- is really leaf before changing cl->un.leaf ! */
+ is really leaf before changing cl->un.leaf ! */
if (!cl->level) {
cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
- printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
+ printk(KERN_WARNING
+ "HTB: quantum of class %X is small. Consider r2q change.\n",
+ cl->classid);
cl->un.leaf.quantum = 1000;
}
if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
- printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
+ printk(KERN_WARNING
+ "HTB: quantum of class %X is big. Consider r2q change.\n",
+ cl->classid);
cl->un.leaf.quantum = 200000;
}
if (hopt->quantum)
@@ -1638,16 +1475,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->buffer = hopt->buffer;
cl->cbuffer = hopt->cbuffer;
- if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
- if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+ if (cl->rate)
+ qdisc_put_rtab(cl->rate);
+ cl->rate = rtab;
+ if (cl->ceil)
+ qdisc_put_rtab(cl->ceil);
+ cl->ceil = ctab;
sch_tree_unlock(sch);
*arg = (unsigned long)cl;
return 0;
failure:
- if (rtab) qdisc_put_rtab(rtab);
- if (ctab) qdisc_put_rtab(ctab);
+ if (rtab)
+ qdisc_put_rtab(rtab);
+ if (ctab)
+ qdisc_put_rtab(ctab);
return err;
}
@@ -1656,28 +1499,28 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
- HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+
return fl;
}
static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
- u32 classid)
+ u32 classid)
{
struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = htb_find (classid,sch);
- HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+ struct htb_class *cl = htb_find(classid, sch);
+
/*if (cl && !cl->level) return 0;
- The line above used to be there to prevent attaching filters to
- leaves. But at least tc_index filter uses this just to get class
- for other reasons so that we have to allow for it.
- ----
- 19.6.2002 As Werner explained it is ok - bind filter is just
- another way to "lock" the class - unlike "get" this lock can
- be broken by class during destroy IIUC.
+ The line above used to be there to prevent attaching filters to
+ leaves. But at least tc_index filter uses this just to get class
+ for other reasons so that we have to allow for it.
+ ----
+ 19.6.2002 As Werner explained it is ok - bind filter is just
+ another way to "lock" the class - unlike "get" this lock can
+ be broken by class during destroy IIUC.
*/
- if (cl)
- cl->filter_cnt++;
- else
+ if (cl)
+ cl->filter_cnt++;
+ else
q->filter_cnt++;
return (unsigned long)cl;
}
@@ -1686,10 +1529,10 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
- if (cl)
- cl->filter_cnt--;
- else
+
+ if (cl)
+ cl->filter_cnt--;
+ else
q->filter_cnt--;
}
@@ -1702,9 +1545,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < HTB_HSIZE; i++) {
- struct list_head *p;
- list_for_each (p,q->hash+i) {
- struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+ struct hlist_node *p;
+ struct htb_class *cl;
+
+ hlist_for_each_entry(cl, p, q->hash + i, hlist) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -1752,12 +1596,13 @@ static struct Qdisc_ops htb_qdisc_ops = {
static int __init htb_module_init(void)
{
- return register_qdisc(&htb_qdisc_ops);
+ return register_qdisc(&htb_qdisc_ops);
}
-static void __exit htb_module_exit(void)
+static void __exit htb_module_exit(void)
{
- unregister_qdisc(&htb_qdisc_ops);
+ unregister_qdisc(&htb_qdisc_ops);
}
+
module_init(htb_module_init)
module_exit(htb_module_exit)
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 8edc32a6ad2f..c3242f727d41 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -7,7 +7,6 @@
* Authors: Jamal Hadi Salim 1999
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/skbuff.h>
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5a4a4d0ae502..45939bafbdf8 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -13,7 +13,6 @@
* Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/types.h>
@@ -149,7 +148,8 @@ static long tabledist(unsigned long mu, long sigma,
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+ /* We don't fill cb now as skb_unshare() may invalidate it */
+ struct netem_skb_cb *cb;
struct sk_buff *skb2;
int ret;
int count = 1;
@@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
if (!(skb = skb_unshare(skb, GFP_ATOMIC))
- || (skb->ip_summed == CHECKSUM_HW
- && skb_checksum_help(skb, 0))) {
+ || (skb->ip_summed == CHECKSUM_PARTIAL
+ && skb_checksum_help(skb))) {
sch->qstats.drops++;
return NET_XMIT_DROP;
}
@@ -201,6 +201,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
}
+ cb = (struct netem_skb_cb *)skb->cb;
if (q->gap == 0 /* not doing reordering */
|| q->counter < q->gap /* inside last reordering gap */
|| q->reorder < get_crandom(&q->reorder_cor)) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3395ca7bcadf..a5fa03c0c19b 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -11,7 +11,6 @@
* Init -- EINVAL when opt undefined
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 2be563cba72b..d65cadddea69 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -14,7 +14,6 @@
* J Hadi Salim 980816: ECN support
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e057768f68b4..d0d6e595a78c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -9,7 +9,6 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index d8e03c74ca76..d9a5d298d755 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -12,7 +12,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 9d05e13e92f6..27329ce9c311 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -441,7 +441,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
/* If the primary path is changing, assume that the
* user wants to use this new path.
*/
- if (transport->state != SCTP_INACTIVE)
+ if ((transport->state == SCTP_ACTIVE) ||
+ (transport->state == SCTP_UNKNOWN))
asoc->peer.active_path = transport;
/*
@@ -532,11 +533,11 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
port = addr->v4.sin_port;
SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ",
- " port: %d state:%s\n",
+ " port: %d state:%d\n",
asoc,
addr,
addr->v4.sin_port,
- peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE");
+ peer_state);
/* Set the port if it has not been set yet. */
if (0 == asoc->peer.port)
@@ -545,9 +546,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Check to see if this is a duplicate. */
peer = sctp_assoc_lookup_paddr(asoc, addr);
if (peer) {
- if (peer_state == SCTP_ACTIVE &&
- peer->state == SCTP_UNKNOWN)
- peer->state = SCTP_ACTIVE;
+ if (peer->state == SCTP_UNKNOWN) {
+ if (peer_state == SCTP_ACTIVE)
+ peer->state = SCTP_ACTIVE;
+ if (peer_state == SCTP_UNCONFIRMED)
+ peer->state = SCTP_UNCONFIRMED;
+ }
return peer;
}
@@ -739,7 +743,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
list_for_each(pos, &asoc->peer.transport_addr_list) {
t = list_entry(pos, struct sctp_transport, transports);
- if (t->state == SCTP_INACTIVE)
+ if ((t->state == SCTP_INACTIVE) ||
+ (t->state == SCTP_UNCONFIRMED))
continue;
if (!first || t->last_time_heard > first->last_time_heard) {
second = first;
@@ -759,7 +764,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* [If the primary is active but not most recent, bump the most
* recently used transport.]
*/
- if (asoc->peer.primary_path->state != SCTP_INACTIVE &&
+ if (((asoc->peer.primary_path->state == SCTP_ACTIVE) ||
+ (asoc->peer.primary_path->state == SCTP_UNKNOWN)) &&
first != asoc->peer.primary_path) {
second = first;
first = asoc->peer.primary_path;
@@ -1054,7 +1060,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
transports);
if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr))
sctp_assoc_add_peer(asoc, &trans->ipaddr,
- GFP_ATOMIC, SCTP_ACTIVE);
+ GFP_ATOMIC, trans->state);
}
asoc->ctsn_ack_point = asoc->next_tsn - 1;
@@ -1094,7 +1100,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
/* Try to find an active transport. */
- if (t->state != SCTP_INACTIVE) {
+ if ((t->state == SCTP_ACTIVE) ||
+ (t->state == SCTP_UNKNOWN)) {
break;
} else {
/* Keep track of the next transport in case
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 2b962627f631..2b9c12a170e5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
/* Add an address to the bind address list in the SCTP_bind_addr structure. */
int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
- gfp_t gfp)
+ __u8 use_as_src, gfp_t gfp)
{
struct sctp_sockaddr_entry *addr;
@@ -163,6 +163,8 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
if (!addr->a.v4.sin_port)
addr->a.v4.sin_port = bp->port;
+ addr->use_as_src = use_as_src;
+
INIT_LIST_HEAD(&addr->list);
list_add_tail(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
@@ -274,7 +276,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
}
af->from_addr_param(&addr, rawaddr, port, 0);
- retval = sctp_add_bind_addr(bp, &addr, gfp);
+ retval = sctp_add_bind_addr(bp, &addr, 1, gfp);
if (retval) {
/* Can't finish building the list, clean up. */
sctp_bind_addr_clean(bp);
@@ -367,7 +369,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
(((AF_INET6 == addr->sa.sa_family) &&
(flags & SCTP_ADDR6_ALLOWED) &&
(flags & SCTP_ADDR6_PEERSUPP))))
- error = sctp_add_bind_addr(dest, addr, gfp);
+ error = sctp_add_bind_addr(dest, addr, 1, gfp);
}
return error;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 67bd53070ee0..35c49ff2d062 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -158,6 +158,12 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
void sctp_endpoint_free(struct sctp_endpoint *ep)
{
ep->base.dead = 1;
+
+ ep->base.sk->sk_state = SCTP_SS_CLOSED;
+
+ /* Unlink this endpoint, so we can't find it again! */
+ sctp_unhash_endpoint(ep);
+
sctp_endpoint_put(ep);
}
@@ -166,13 +172,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
- ep->base.sk->sk_state = SCTP_SS_CLOSED;
-
- /* Unlink this endpoint, so we can't find it again! */
- sctp_unhash_endpoint(ep);
-
/* Free up the HMAC transform. */
- sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
+ crypto_free_hash(sctp_sk(ep->base.sk)->hmac);
/* Cleanup. */
sctp_inq_free(&ep->base.inqueue);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 42b66e74bbb5..64f630102532 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -218,17 +218,11 @@ int sctp_rcv(struct sk_buff *skb)
}
}
- /* SCTP seems to always need a timestamp right now (FIXME) */
- if (skb->tstamp.off_sec == 0) {
- __net_timestamp(skb);
- sock_enable_timestamp(sk);
- }
-
if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
goto discard_release;
nf_reset(skb);
- if (sk_filter(sk, skb, 1))
+ if (sk_filter(sk, skb))
goto discard_release;
/* Create an SCTP packet structure. */
@@ -255,10 +249,13 @@ int sctp_rcv(struct sk_buff *skb)
*/
sctp_bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
+ if (sock_owned_by_user(sk)) {
+ SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
sctp_add_backlog(sk, skb);
- else
+ } else {
+ SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
+ }
sctp_bh_unlock_sock(sk);
@@ -271,6 +268,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
discard_it:
+ SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
@@ -384,7 +382,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
* pmtu discovery on this transport.
*/
t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- t->param_flags = (t->param_flags & ~SPP_HB) |
+ t->param_flags = (t->param_flags & ~SPP_PMTUD) |
SPP_PMTUD_DISABLE;
} else {
t->pathmtu = pmtu;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf0c767d43ae..cf6deed7e849 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue)
/* Put a new packet in an SCTP inqueue.
* We assume that packet->sctp_hdr is set and in host byte order.
*/
-void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
+void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
{
/* Directly call the packet handling routine. */
@@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
* Eventually, we should clean up inqueue to not rely
* on the BH related data structures.
*/
- list_add_tail(&packet->list, &q->in_chunk_list);
+ list_add_tail(&chunk->list, &q->in_chunk_list);
q->immediate.func(q->immediate.data);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 8ef08070c8b6..249e5033c1a8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -78,7 +78,6 @@
#include <asm/uaccess.h>
-extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *);
static struct notifier_block sctp_inet6addr_notifier = {
.notifier_call = sctp_inetaddr_event,
};
@@ -290,7 +289,8 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if ((laddr->a.sa.sa_family == AF_INET6) &&
+ if ((laddr->use_as_src) &&
+ (laddr->a.sa.sa_family == AF_INET6) &&
(scope <= sctp_scope(&laddr->a))) {
bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
if (!baddr || (matchlen < bmatchlen)) {
@@ -321,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
struct inet6_ifaddr *ifp;
struct sctp_sockaddr_entry *addr;
- read_lock(&addrconf_lock);
+ rcu_read_lock();
if ((in6_dev = __in6_dev_get(dev)) == NULL) {
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
return;
}
@@ -342,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
}
read_unlock(&in6_dev->lock);
- read_unlock(&addrconf_lock);
+ rcu_read_unlock();
}
/* Initialize a sockaddr_storage from in incoming skb. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cdc5a3936766..3ef4351dd956 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -633,7 +633,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
* data will fit or delay in hopes of bundling a full
* sized packet.
*/
- if (len < asoc->pathmtu - packet->overhead) {
+ if (len < asoc->frag_point) {
retval = SCTP_XMIT_NAGLE_DELAY;
goto finish;
}
@@ -645,7 +645,13 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
/* Keep track of how many bytes are in flight to the receiver. */
asoc->outqueue.outstanding_bytes += datasize;
- /* Update our view of the receiver's rwnd. */
+ /* Update our view of the receiver's rwnd. Include sk_buff overhead
+ * while updating peer.rwnd so that it reduces the chances of a
+ * receiver running out of receive buffer space even when receive
+ * window is still open. This can happen when a sender is sending
+ * sending small messages.
+ */
+ datasize += sizeof(struct sk_buff);
if (datasize < rwnd)
rwnd -= datasize;
else
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e5faa351aaad..739582415bf6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -416,7 +416,8 @@ void sctp_retransmit_mark(struct sctp_outq *q,
* (Section 7.2.4)), add the data size of those
* chunks to the rwnd.
*/
- q->asoc->peer.rwnd += sctp_data_size(chunk);
+ q->asoc->peer.rwnd += (sctp_data_size(chunk) +
+ sizeof(struct sk_buff));
q->outstanding_bytes -= sctp_data_size(chunk);
transport->flight_size -= sctp_data_size(chunk);
@@ -467,6 +468,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
switch(reason) {
case SCTP_RTXR_T3_RTX:
+ SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
/* Update the retran path if the T3-rtx timer has expired for
* the current retran path.
@@ -475,12 +477,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
sctp_assoc_update_retran_path(transport->asoc);
break;
case SCTP_RTXR_FAST_RTX:
+ SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
fast_retransmit = 1;
break;
case SCTP_RTXR_PMTUD:
- default:
+ SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
break;
+ default:
+ BUG();
}
sctp_retransmit_mark(q, transport, fast_retransmit);
@@ -691,7 +696,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
if (!new_transport) {
new_transport = asoc->peer.active_path;
- } else if (new_transport->state == SCTP_INACTIVE) {
+ } else if ((new_transport->state == SCTP_INACTIVE) ||
+ (new_transport->state == SCTP_UNCONFIRMED)) {
/* If the chunk is Heartbeat or Heartbeat Ack,
* send it to chunk->transport, even if it's
* inactive.
@@ -848,7 +854,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
*/
new_transport = chunk->transport;
if (!new_transport ||
- new_transport->state == SCTP_INACTIVE)
+ ((new_transport->state == SCTP_INACTIVE) ||
+ (new_transport->state == SCTP_UNCONFIRMED)))
new_transport = asoc->peer.active_path;
/* Change packets if necessary. */
@@ -1464,7 +1471,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
/* Mark the destination transport address as
* active if it is not so marked.
*/
- if (transport->state == SCTP_INACTIVE) {
+ if ((transport->state == SCTP_INACTIVE) ||
+ (transport->state == SCTP_UNCONFIRMED)) {
sctp_assoc_control_transport(
transport->asoc,
transport,
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 5b3b0e0ae7e5..a356d8d310a9 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = {
SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+ SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS),
+ SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS),
+ SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS),
+ SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS),
+ SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS),
+ SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS),
+ SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ),
+ SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG),
+ SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS),
+ SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS),
SNMP_MIB_SENTINEL
};
@@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
"%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
assoc, sk, sctp_sk(sk)->type, sk->sk_state,
assoc->state, hash, assoc->assoc_id,
- (sk->sk_rcvbuf - assoc->rwnd),
assoc->sndbuf_used,
+ (sk->sk_rcvbuf - assoc->rwnd),
sock_i_uid(sk), sock_i_ino(sk),
epb->bind_addr.port,
assoc->peer.port);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 816c033d7886..fac7674438a4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -61,7 +61,7 @@
#include <net/inet_ecn.h>
/* Global data structures. */
-struct sctp_globals sctp_globals;
+struct sctp_globals sctp_globals __read_mostly;
struct proc_dir_entry *proc_net_sctp;
DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
@@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific;
kmem_cache_t *sctp_chunk_cachep __read_mostly;
kmem_cache_t *sctp_bucket_cachep __read_mostly;
-extern int sctp_snmp_proc_init(void);
-extern int sctp_snmp_proc_exit(void);
-extern int sctp_eps_proc_init(void);
-extern int sctp_eps_proc_exit(void);
-extern int sctp_assocs_proc_init(void);
-extern int sctp_assocs_proc_exit(void);
-
/* Return the address of the control sock. */
struct sock *sctp_get_ctl_sock(void)
{
@@ -240,7 +233,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
(((AF_INET6 == addr->a.sa.sa_family) &&
(copy_flags & SCTP_ADDR6_ALLOWED) &&
(copy_flags & SCTP_ADDR6_PEERSUPP)))) {
- error = sctp_add_bind_addr(bp, &addr->a,
+ error = sctp_add_bind_addr(bp, &addr->a, 1,
GFP_ATOMIC);
if (error)
goto end_copy;
@@ -486,6 +479,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sctp_sockaddr_entry,
list);
+ if (!laddr->use_as_src)
+ continue;
sctp_v4_dst_saddr(&dst_saddr, dst, bp->port);
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
@@ -506,7 +501,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (AF_INET == laddr->a.sa.sa_family) {
+ if ((laddr->use_as_src) &&
+ (AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
if (!ip_route_output_key(&rt, &fl)) {
dst = &rt->u.dst;
@@ -1046,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void)
sctp_rto_beta = SCTP_RTO_BETA;
/* Valid.Cookie.Life - 60 seconds */
- sctp_valid_cookie_life = 60 * HZ;
+ sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
/* Whether Cookie Preservative is enabled(1) or not(0) */
sctp_cookie_preserve_enable = 1;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 2a8773691695..507dff72c585 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -806,38 +806,26 @@ no_mem:
/* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */
struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- const struct msghdr *msg)
+ const struct msghdr *msg,
+ size_t paylen)
{
struct sctp_chunk *retval;
- void *payload = NULL, *payoff;
- size_t paylen = 0;
- struct iovec *iov = NULL;
- int iovlen = 0;
-
- if (msg) {
- iov = msg->msg_iov;
- iovlen = msg->msg_iovlen;
- paylen = get_user_iov_size(iov, iovlen);
- }
+ void *payload = NULL;
+ int err;
- retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen);
+ retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
if (!retval)
goto err_chunk;
if (paylen) {
/* Put the msg_iov together into payload. */
- payload = kmalloc(paylen, GFP_ATOMIC);
+ payload = kmalloc(paylen, GFP_KERNEL);
if (!payload)
goto err_payload;
- payoff = payload;
- for (; iovlen > 0; --iovlen) {
- if (copy_from_user(payoff, iov->iov_base,iov->iov_len))
- goto err_copy;
- payoff += iov->iov_len;
- iov++;
- }
+ err = memcpy_fromiovec(payload, msg->msg_iov, paylen);
+ if (err < 0)
+ goto err_copy;
}
sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen);
@@ -1294,10 +1282,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
retval = kmalloc(*cookie_len, GFP_ATOMIC);
- if (!retval) {
- *cookie_len = 0;
+ if (!retval)
goto nodata;
- }
/* Clear this memory since we are sending this data structure
* out on the network.
@@ -1333,19 +1319,29 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
if (sctp_sk(ep->base.sk)->hmac) {
+ struct hash_desc desc;
+
/* Sign the message. */
sg.page = virt_to_page(&cookie->c);
sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE;
sg.length = bodysize;
keylen = SCTP_SECRET_SIZE;
key = (char *)ep->secret_key[ep->current_key];
+ desc.tfm = sctp_sk(ep->base.sk)->hmac;
+ desc.flags = 0;
- sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
- &sg, 1, cookie->signature);
+ if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+ crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
+ goto free_cookie;
}
-nodata:
return retval;
+
+free_cookie:
+ kfree(retval);
+nodata:
+ *cookie_len = 0;
+ return NULL;
}
/* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */
@@ -1366,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie(
sctp_scope_t scope;
struct sk_buff *skb = chunk->skb;
struct timeval tv;
+ struct hash_desc desc;
/* Header size is static data prior to the actual cookie, including
* any padding.
@@ -1401,17 +1398,25 @@ struct sctp_association *sctp_unpack_cookie(
sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE;
sg.length = bodysize;
key = (char *)ep->secret_key[ep->current_key];
+ desc.tfm = sctp_sk(ep->base.sk)->hmac;
+ desc.flags = 0;
memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
- sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg,
- 1, digest);
+ if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+ crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+ *error = -SCTP_IERROR_NOMEM;
+ goto fail;
+ }
if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
/* Try the previous key. */
key = (char *)ep->secret_key[ep->last_key];
memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
- sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
- &sg, 1, digest);
+ if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+ crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+ *error = -SCTP_IERROR_NOMEM;
+ goto fail;
+ }
if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
/* Yikes! Still bad signature! */
@@ -1442,8 +1447,16 @@ no_hmac:
/* Check to see if the cookie is stale. If there is already
* an association, there is no need to check cookie's expiration
* for init collision case of lost COOKIE ACK.
+ * If skb has been timestamped, then use the stamp, otherwise
+ * use current time. This introduces a small possibility that
+ * that a cookie may be considered expired, but his would only slow
+ * down the new association establishment instead of every packet.
*/
- skb_get_timestamp(skb, &tv);
+ if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
+ skb_get_timestamp(skb, &tv);
+ else
+ do_gettimeofday(&tv);
+
if (!asoc && tv_lt(bear_cookie->expiration, tv)) {
__u16 len;
/*
@@ -1493,7 +1506,7 @@ no_hmac:
/* Also, add the destination address. */
if (list_empty(&retval->base.bind_addr.address_list)) {
- sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
+ sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
GFP_ATOMIC);
}
@@ -2017,7 +2030,7 @@ static int sctp_process_param(struct sctp_association *asoc,
af->from_addr_param(&addr, param.addr, asoc->peer.port, 0);
scope = sctp_scope(peer_addr);
if (sctp_in_scope(&addr, scope))
- if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE))
+ if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
return 0;
break;
@@ -2418,7 +2431,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
* Due to Resource Shortage'.
*/
- peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE);
+ peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_UNCONFIRMED);
if (!peer)
return SCTP_ERROR_RSRC_LOW;
@@ -2565,6 +2578,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
union sctp_addr_param *addr_param;
struct list_head *pos;
struct sctp_transport *transport;
+ struct sctp_sockaddr_entry *saddr;
int retval = 0;
addr_param = (union sctp_addr_param *)
@@ -2578,7 +2592,11 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
case SCTP_PARAM_ADD_IP:
sctp_local_bh_disable();
sctp_write_lock(&asoc->base.addr_lock);
- retval = sctp_add_bind_addr(bp, &addr, GFP_ATOMIC);
+ list_for_each(pos, &bp->address_list) {
+ saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ if (sctp_cmp_addr_exact(&saddr->a, &addr))
+ saddr->use_as_src = 1;
+ }
sctp_write_unlock(&asoc->base.addr_lock);
sctp_local_bh_enable();
break;
@@ -2591,6 +2609,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
list_for_each(pos, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport,
transports);
+ dst_release(transport->dst);
sctp_transport_route(transport, NULL,
sctp_sk(asoc->base.sk));
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index c5beb2ad7ef7..9c10bdec1afe 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -430,7 +430,11 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
/* The check for association's overall error counter exceeding the
* threshold is done in the state function.
*/
- asoc->overall_error_count++;
+ /* When probing UNCONFIRMED addresses, the association overall
+ * error count is NOT incremented
+ */
+ if (transport->state != SCTP_UNCONFIRMED)
+ asoc->overall_error_count++;
if (transport->state != SCTP_INACTIVE &&
(transport->error_count++ >= transport->pathmaxrxt)) {
@@ -610,7 +614,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
/* Mark the destination transport address as active if it is not so
* marked.
*/
- if (t->state == SCTP_INACTIVE)
+ if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED))
sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
SCTP_HEARTBEAT_SUCCESS);
@@ -620,6 +624,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
*/
hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
+
+ /* Update the heartbeat timer. */
+ if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
+ sctp_transport_hold(t);
}
/* Helper function to do a transport reset at the expiry of the hearbeat
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9e58144f4851..1c42fe983a5b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
*/
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
0, 0, 0, GFP_ATOMIC);
- if (!ev)
- goto nomem;
-
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+ if (ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ev));
/* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint
* will verify that it is in SHUTDOWN-ACK-SENT state, if it is
@@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
return SCTP_DISPOSITION_DELETE_TCB;
-
-nomem:
- return SCTP_DISPOSITION_NOMEM;
}
/*
@@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
GFP_ATOMIC))
goto nomem_init;
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
-
/* B) "Z" shall respond immediately with an INIT ACK chunk. */
/* If there are errors need to be reported for unknown parameters,
@@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
sizeof(sctp_chunkhdr_t);
if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
- goto nomem_ack;
+ goto nomem_init;
repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
if (!repl)
- goto nomem_ack;
+ goto nomem_init;
/* If there are errors need to be reported for unknown parameters,
* include them in the outgoing INIT ACK as "Unrecognized parameter"
@@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
sctp_chunk_free(err_chunk);
}
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
/*
@@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
return SCTP_DISPOSITION_DELETE_TCB;
-nomem_ack:
- if (err_chunk)
- sctp_chunk_free(err_chunk);
nomem_init:
sctp_association_free(new_asoc);
nomem:
+ if (err_chunk)
+ sctp_chunk_free(err_chunk);
return SCTP_DISPOSITION_NOMEM;
}
@@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
struct sctp_association *new_asoc;
sctp_init_chunk_t *peer_init;
struct sctp_chunk *repl;
- struct sctp_ulpevent *ev;
+ struct sctp_ulpevent *ev, *ai_ev = NULL;
int error = 0;
struct sctp_chunk *err_chk_p;
@@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
};
}
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
- SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
- sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
-
- if (new_asoc->autoclose)
- sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
- SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
-
- sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
- /* Re-build the bind address for the association is done in
+ /* Delay state machine commands until later.
+ *
+ * Re-build the bind address for the association is done in
* the sctp_unpack_cookie() already.
*/
/* This is a brand-new association, so these are not yet side
@@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
repl = sctp_make_cookie_ack(new_asoc, chunk);
if (!repl)
- goto nomem_repl;
-
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ goto nomem_init;
/* RFC 2960 5.1 Normal Establishment of an Association
*
@@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
if (!ev)
goto nomem_ev;
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
-
/* Sockets API Draft Section 5.3.1.6
* When a peer sends a Adaption Layer Indication parameter , SCTP
* delivers this notification to inform the application that of the
* peers requested adaption layer.
*/
if (new_asoc->peer.adaption_ind) {
- ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+ ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc,
GFP_ATOMIC);
- if (!ev)
- goto nomem_ev;
+ if (!ai_ev)
+ goto nomem_aiev;
+ }
+
+ /* Add all the state machine commands now since we've created
+ * everything. This way we don't introduce memory corruptions
+ * during side-effect processing and correclty count established
+ * associations.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+ SCTP_STATE(SCTP_STATE_ESTABLISHED));
+ SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+ sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+
+ if (new_asoc->autoclose)
+ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+ SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+ sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+ /* This will send the COOKIE ACK */
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+ /* Queue the ASSOC_CHANGE event */
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+ /* Send up the Adaptation Layer Indication event */
+ if (ai_ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
- SCTP_ULPEVENT(ev));
- }
+ SCTP_ULPEVENT(ai_ev));
return SCTP_DISPOSITION_CONSUME;
+nomem_aiev:
+ sctp_ulpevent_free(ev);
nomem_ev:
sctp_chunk_free(repl);
-nomem_repl:
nomem_init:
sctp_association_free(new_asoc);
nomem:
@@ -846,6 +854,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
hbinfo.daddr = transport->ipaddr;
hbinfo.sent_at = jiffies;
+ hbinfo.hb_nonce = transport->hb_nonce;
/* Send a heartbeat to our peer. */
paylen = sizeof(sctp_sender_hb_info_t);
@@ -1048,6 +1057,10 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
return SCTP_DISPOSITION_DISCARD;
}
+ /* Validate the 64-bit random nonce. */
+ if (hbinfo->hb_nonce != link->hb_nonce)
+ return SCTP_DISPOSITION_DISCARD;
+
max_interval = link->hbinterval + link->rto;
/* Check if the timestamp looks valid. */
@@ -1355,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
sctp_source(chunk),
(sctp_init_chunk_t *)chunk->chunk_hdr,
- GFP_ATOMIC)) {
- retval = SCTP_DISPOSITION_NOMEM;
- goto nomem_init;
- }
+ GFP_ATOMIC))
+ goto nomem;
/* Make sure no new addresses are being added during the
* restart. Do not do this check for COOKIE-WAIT state,
@@ -1369,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk,
commands)) {
retval = SCTP_DISPOSITION_CONSUME;
- goto cleanup_asoc;
+ goto nomem_retval;
}
}
@@ -1425,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
retval = SCTP_DISPOSITION_CONSUME;
+ return retval;
+
+nomem:
+ retval = SCTP_DISPOSITION_NOMEM;
+nomem_retval:
+ if (new_asoc)
+ sctp_association_free(new_asoc);
cleanup:
if (err_chunk)
sctp_chunk_free(err_chunk);
return retval;
-nomem:
- retval = SCTP_DISPOSITION_NOMEM;
- goto cleanup;
-nomem_init:
-cleanup_asoc:
- sctp_association_free(new_asoc);
- goto cleanup;
}
/*
@@ -1606,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
*/
sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
-
repl = sctp_make_cookie_ack(new_asoc, chunk);
if (!repl)
goto nomem;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
-
/* Report association restart to upper layer. */
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
new_asoc->c.sinit_num_ostreams,
@@ -1623,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
if (!ev)
goto nomem_ev;
+ /* Update the content of current association. */
+ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
return SCTP_DISPOSITION_CONSUME;
@@ -1746,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
sctp_cmd_seq_t *commands,
struct sctp_association *new_asoc)
{
- struct sctp_ulpevent *ev = NULL;
+ struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
struct sctp_chunk *repl;
/* Clarification from Implementor's Guide:
@@ -1773,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
* SCTP user upon reception of a valid COOKIE
* ECHO chunk.
*/
- ev = sctp_ulpevent_make_assoc_change(new_asoc, 0,
+ ev = sctp_ulpevent_make_assoc_change(asoc, 0,
SCTP_COMM_UP, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
GFP_ATOMIC);
if (!ev)
goto nomem;
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
- SCTP_ULPEVENT(ev));
/* Sockets API Draft Section 5.3.1.6
* When a peer sends a Adaption Layer Indication parameter,
* SCTP delivers this notification to inform the application
* that of the peers requested adaption layer.
*/
- if (new_asoc->peer.adaption_ind) {
- ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+ if (asoc->peer.adaption_ind) {
+ ai_ev = sctp_ulpevent_make_adaption_indication(asoc,
GFP_ATOMIC);
- if (!ev)
+ if (!ai_ev)
goto nomem;
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
- SCTP_ULPEVENT(ev));
}
}
sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
@@ -1804,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
if (!repl)
goto nomem;
+ if (ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ev));
+ if (ai_ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ai_ev));
+
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
nomem:
+ if (ai_ev)
+ sctp_ulpevent_free(ai_ev);
if (ev)
sctp_ulpevent_free(ev);
return SCTP_DISPOSITION_NOMEM;
@@ -2658,9 +2672,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
break;
case SCTP_IERROR_HIGH_TSN:
case SCTP_IERROR_BAD_STREAM:
+ SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_noforce;
case SCTP_IERROR_DUP_TSN:
case SCTP_IERROR_IGNORE_TSN:
+ SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_force;
case SCTP_IERROR_NO_DATA:
goto consume;
@@ -3012,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
return sctp_sf_violation_chunklen(ep, asoc, type, arg,
commands);
-
/* 10.2 H) SHUTDOWN COMPLETE notification
*
* When SCTP completes the shutdown procedures (section 9.2) this
@@ -3023,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
if (!ev)
goto nomem;
+ /* ...send a SHUTDOWN COMPLETE chunk to its peer, */
+ reply = sctp_make_shutdown_complete(asoc, chunk);
+ if (!reply)
+ goto nomem_chunk;
+
+ /* Do all the commands now (after allocation), so that we
+ * have consistent state if memory allocation failes
+ */
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
/* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
@@ -3034,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- /* ...send a SHUTDOWN COMPLETE chunk to its peer, */
- reply = sctp_make_shutdown_complete(asoc, chunk);
- if (!reply)
- goto nomem;
-
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
@@ -3049,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
return SCTP_DISPOSITION_DELETE_TCB;
+nomem_chunk:
+ sctp_ulpevent_free(ev);
nomem:
return SCTP_DISPOSITION_NOMEM;
}
@@ -3647,6 +3667,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
void *arg,
sctp_cmd_seq_t *commands)
{
+ SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
@@ -4026,18 +4047,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
* from its upper layer, but retransmits data to the far end
* if necessary to fill gaps.
*/
- struct msghdr *msg = arg;
- struct sctp_chunk *abort;
+ struct sctp_chunk *abort = arg;
sctp_disposition_t retval;
retval = SCTP_DISPOSITION_CONSUME;
- /* Generate ABORT chunk to send the peer. */
- abort = sctp_make_abort_user(asoc, NULL, msg);
- if (!abort)
- retval = SCTP_DISPOSITION_NOMEM;
- else
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4161,8 +4176,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
void *arg,
sctp_cmd_seq_t *commands)
{
- struct msghdr *msg = arg;
- struct sctp_chunk *abort;
+ struct sctp_chunk *abort = arg;
sctp_disposition_t retval;
/* Stop T1-init timer */
@@ -4170,12 +4184,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
retval = SCTP_DISPOSITION_CONSUME;
- /* Generate ABORT chunk to send the peer */
- abort = sctp_make_abort_user(asoc, NULL, msg);
- if (!abort)
- retval = SCTP_DISPOSITION_NOMEM;
- else
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
@@ -4555,6 +4564,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
{
struct sctp_transport *transport = arg;
+ SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+
if (asoc->overall_error_count >= asoc->max_retrans) {
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
@@ -4623,6 +4634,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
void *arg,
sctp_cmd_seq_t *commands)
{
+ SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
return SCTP_DISPOSITION_CONSUME;
}
@@ -4657,6 +4669,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
+ SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -4716,6 +4729,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
+ SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
repl = sctp_make_cookie_echo(asoc, NULL);
@@ -4760,6 +4774,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
+ SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+
if (asoc->overall_error_count >= asoc->max_retrans) {
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
@@ -4821,6 +4837,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
struct sctp_chunk *chunk = asoc->addip_last_asconf;
struct sctp_transport *transport = chunk->transport;
+ SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+
/* ADDIP 4.1 B1) Increment the error counters and perform path failure
* detection on the appropriate destination address as defined in
* RFC2960 [5] section 8.1 and 8.2.
@@ -4887,6 +4905,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
+ SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
reply = sctp_make_abort(asoc, NULL, 0);
if (!reply)
@@ -4917,6 +4936,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
{
int disposition;
+ SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+
/* From 9.2 Shutdown of an Association
* Upon receipt of the SHUTDOWN primitive from its upper
* layer, the endpoint enters SHUTDOWN-PENDING state and
@@ -5278,7 +5299,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
datalen -= sizeof(sctp_data_chunk_t);
deliver = SCTP_CMD_CHUNK_ULP;
- chunk->data_accepted = 1;
/* Think about partial delivery. */
if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
@@ -5357,6 +5377,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
if (SCTP_CMD_CHUNK_ULP == deliver)
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
+ chunk->data_accepted = 1;
+
/* Note: Some chunks may get overcounted (if we drop) or overcounted
* if we renege and the chunk arrives again.
*/
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b811691c35bf..3fe906d65069 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,7 +57,6 @@
* be incorporated into the next SCTP release.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/wait.h>
@@ -370,7 +369,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
/* Use GFP_ATOMIC since BHs are disabled. */
addr->v4.sin_port = ntohs(addr->v4.sin_port);
- ret = sctp_add_bind_addr(bp, addr, GFP_ATOMIC);
+ ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
addr->v4.sin_port = htons(addr->v4.sin_port);
sctp_write_unlock(&ep->base.addr_lock);
sctp_local_bh_enable();
@@ -492,6 +491,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
struct sctp_chunk *chunk;
struct sctp_sockaddr_entry *laddr;
union sctp_addr *addr;
+ union sctp_addr saveaddr;
void *addr_buf;
struct sctp_af *af;
struct list_head *pos;
@@ -559,14 +559,26 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
}
retval = sctp_send_asconf(asoc, chunk);
+ if (retval)
+ goto out;
- /* FIXME: After sending the add address ASCONF chunk, we
- * cannot append the address to the association's binding
- * address list, because the new address may be used as the
- * source of a message sent to the peer before the ASCONF
- * chunk is received by the peer. So we should wait until
- * ASCONF_ACK is received.
+ /* Add the new addresses to the bind address list with
+ * use_as_src set to 0.
*/
+ sctp_local_bh_disable();
+ sctp_write_lock(&asoc->base.addr_lock);
+ addr_buf = addrs;
+ for (i = 0; i < addrcnt; i++) {
+ addr = (union sctp_addr *)addr_buf;
+ af = sctp_get_af_specific(addr->v4.sin_family);
+ memcpy(&saveaddr, addr, af->sockaddr_len);
+ saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
+ retval = sctp_add_bind_addr(bp, &saveaddr, 0,
+ GFP_ATOMIC);
+ addr_buf += af->sockaddr_len;
+ }
+ sctp_write_unlock(&asoc->base.addr_lock);
+ sctp_local_bh_enable();
}
out:
@@ -677,12 +689,15 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
+ struct sctp_transport *transport;
struct sctp_bind_addr *bp;
struct sctp_chunk *chunk;
union sctp_addr *laddr;
+ union sctp_addr saveaddr;
void *addr_buf;
struct sctp_af *af;
- struct list_head *pos;
+ struct list_head *pos, *pos1;
+ struct sctp_sockaddr_entry *saddr;
int i;
int retval = 0;
@@ -749,14 +764,42 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
goto out;
}
- retval = sctp_send_asconf(asoc, chunk);
+ /* Reset use_as_src flag for the addresses in the bind address
+ * list that are to be deleted.
+ */
+ sctp_local_bh_disable();
+ sctp_write_lock(&asoc->base.addr_lock);
+ addr_buf = addrs;
+ for (i = 0; i < addrcnt; i++) {
+ laddr = (union sctp_addr *)addr_buf;
+ af = sctp_get_af_specific(laddr->v4.sin_family);
+ memcpy(&saveaddr, laddr, af->sockaddr_len);
+ saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
+ list_for_each(pos1, &bp->address_list) {
+ saddr = list_entry(pos1,
+ struct sctp_sockaddr_entry,
+ list);
+ if (sctp_cmp_addr_exact(&saddr->a, &saveaddr))
+ saddr->use_as_src = 0;
+ }
+ addr_buf += af->sockaddr_len;
+ }
+ sctp_write_unlock(&asoc->base.addr_lock);
+ sctp_local_bh_enable();
- /* FIXME: After sending the delete address ASCONF chunk, we
- * cannot remove the addresses from the association's bind
- * address list, because there maybe some packet send to
- * the delete addresses, so we should wait until ASCONF_ACK
- * packet is received.
+ /* Update the route and saddr entries for all the transports
+ * as some of the addresses in the bind address list are
+ * about to be deleted and cannot be used as source addresses.
*/
+ list_for_each(pos1, &asoc->peer.transport_addr_list) {
+ transport = list_entry(pos1, struct sctp_transport,
+ transports);
+ dst_release(transport->dst);
+ sctp_transport_route(transport, NULL,
+ sctp_sk(asoc->base.sk));
+ }
+
+ retval = sctp_send_asconf(asoc, chunk);
}
out:
return retval;
@@ -1246,9 +1289,13 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
}
}
- if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)
- sctp_primitive_ABORT(asoc, NULL);
- else
+ if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+ struct sctp_chunk *chunk;
+
+ chunk = sctp_make_abort_user(asoc, NULL, 0);
+ if (chunk)
+ sctp_primitive_ABORT(asoc, chunk);
+ } else
sctp_primitive_SHUTDOWN(asoc, NULL);
}
@@ -1477,8 +1524,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_unlock;
}
if (sinfo_flags & SCTP_ABORT) {
+ struct sctp_chunk *chunk;
+
+ chunk = sctp_make_abort_user(asoc, msg, msg_len);
+ if (!chunk) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
- sctp_primitive_ABORT(asoc, msg);
+ sctp_primitive_ABORT(asoc, chunk);
err = 0;
goto out_unlock;
}
@@ -2026,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
* SPP_SACKDELAY_ENABLE, setting both will have undefined
* results.
*/
-int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
- struct sctp_transport *trans,
- struct sctp_association *asoc,
- struct sctp_sock *sp,
- int hb_change,
- int pmtud_change,
- int sackdelay_change)
+static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
+ struct sctp_transport *trans,
+ struct sctp_association *asoc,
+ struct sctp_sock *sp,
+ int hb_change,
+ int pmtud_change,
+ int sackdelay_change)
{
int error;
@@ -2915,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err)
goto out;
}
- timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
error = sctp_wait_for_accept(sk, timeo);
if (error)
@@ -2990,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->initmsg.sinit_num_ostreams = sctp_max_outstreams;
sp->initmsg.sinit_max_instreams = sctp_max_instreams;
sp->initmsg.sinit_max_attempts = sctp_max_retrans_init;
- sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max);
+ sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
/* Initialize default RTO related parameters. These parameters can
* be modified for with the SCTP_RTOINFO socket option.
*/
- sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial);
- sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max);
- sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min);
+ sp->rtoinfo.srto_initial = sctp_rto_initial;
+ sp->rtoinfo.srto_max = sctp_rto_max;
+ sp->rtoinfo.srto_min = sctp_rto_min;
/* Initialize default association related parameters. These parameters
* can be modified with the SCTP_ASSOCINFO socket option.
@@ -3006,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->assocparams.sasoc_number_peer_destinations = 0;
sp->assocparams.sasoc_peer_rwnd = 0;
sp->assocparams.sasoc_local_rwnd = 0;
- sp->assocparams.sasoc_cookie_life =
- jiffies_to_msecs(sctp_valid_cookie_life);
+ sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
/* Initialize default event subscriptions. By default, all the
* options are off.
@@ -3017,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
/* Default Peer Address Parameters. These defaults can
* be modified via SCTP_PEER_ADDR_PARAMS
*/
- sp->hbinterval = jiffies_to_msecs(sctp_hb_interval);
+ sp->hbinterval = sctp_hb_interval;
sp->pathmaxrxt = sctp_max_retrans_path;
sp->pathmtu = 0; // allow default discovery
- sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout);
+ sp->sackdelay = sctp_sack_timeout;
sp->param_flags = SPP_HB_ENABLE |
SPP_PMTUD_ENABLE |
SPP_SACKDELAY_ENABLE;
@@ -3030,8 +3084,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
*/
sp->disable_fragments = 0;
- /* Turn on/off any Nagle-like algorithm. */
- sp->nodelay = 1;
+ /* Enable Nagle algorithm by default. */
+ sp->nodelay = 0;
/* Enable by default. */
sp->v4mapped = 1;
@@ -4843,7 +4897,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
int sctp_inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
- struct crypto_tfm *tfm=NULL;
+ struct crypto_hash *tfm = NULL;
int err = -EINVAL;
if (unlikely(backlog < 0))
@@ -4856,7 +4910,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
/* Allocate HMAC for generating cookie. */
if (sctp_hmac_alg) {
- tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0);
+ tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
if (!tfm) {
err = -ENOSYS;
goto out;
@@ -4882,7 +4936,7 @@ out:
sctp_release_sock(sk);
return err;
cleanup:
- sctp_crypto_free_tfm(tfm);
+ crypto_free_hash(tfm);
goto out;
}
@@ -4978,7 +5032,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
/* Caller must hold hashbucket lock for this tb with local BH disabled */
static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
{
- if (hlist_empty(&pp->owner)) {
+ if (pp && hlist_empty(&pp->owner)) {
if (pp->next)
pp->next->pprev = pp->pprev;
*(pp->pprev) = pp->next;
@@ -5564,6 +5618,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
/* Copy the bind_addr list from the original endpoint to the new
* endpoint so that we can handle restarts properly
*/
+ if (PF_INET6 == assoc->base.sk->sk_family)
+ flags = SCTP_ADDR6_ALLOWED;
if (assoc->peer.ipv4_address)
flags |= SCTP_ADDR4_PEERSUPP;
if (assoc->peer.ipv6_address)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dc6f3ff32358..633cd178654b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -45,9 +45,10 @@
#include <net/sctp/sctp.h>
#include <linux/sysctl.h>
-static ctl_handler sctp_sysctl_jiffies_ms;
-static long rto_timer_min = 1;
-static long rto_timer_max = 86400000; /* One day */
+static int zero = 0;
+static int one = 1;
+static int timer_max = 86400000; /* ms in one day */
+static int int_max = INT_MAX;
static long sack_timer_min = 1;
static long sack_timer_max = 500;
@@ -56,45 +57,45 @@ static ctl_table sctp_table[] = {
.ctl_name = NET_SCTP_RTO_INITIAL,
.procname = "rto_initial",
.data = &sctp_rto_initial,
- .maxlen = sizeof(long),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
- .extra1 = &rto_timer_min,
- .extra2 = &rto_timer_max
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
.ctl_name = NET_SCTP_RTO_MIN,
.procname = "rto_min",
.data = &sctp_rto_min,
- .maxlen = sizeof(long),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
- .extra1 = &rto_timer_min,
- .extra2 = &rto_timer_max
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
.ctl_name = NET_SCTP_RTO_MAX,
.procname = "rto_max",
.data = &sctp_rto_max,
- .maxlen = sizeof(long),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
- .extra1 = &rto_timer_min,
- .extra2 = &rto_timer_max
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
.ctl_name = NET_SCTP_VALID_COOKIE_LIFE,
.procname = "valid_cookie_life",
.data = &sctp_valid_cookie_life,
- .maxlen = sizeof(long),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
- .extra1 = &rto_timer_min,
- .extra2 = &rto_timer_max
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
.ctl_name = NET_SCTP_MAX_BURST,
@@ -102,7 +103,10 @@ static ctl_table sctp_table[] = {
.data = &sctp_max_burst,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &int_max
},
{
.ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS,
@@ -110,7 +114,10 @@ static ctl_table sctp_table[] = {
.data = &sctp_max_retrans_association,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &int_max
},
{
.ctl_name = NET_SCTP_SNDBUF_POLICY,
@@ -118,7 +125,8 @@ static ctl_table sctp_table[] = {
.data = &sctp_sndbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_RCVBUF_POLICY,
@@ -126,7 +134,8 @@ static ctl_table sctp_table[] = {
.data = &sctp_rcvbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_PATH_MAX_RETRANS,
@@ -134,7 +143,10 @@ static ctl_table sctp_table[] = {
.data = &sctp_max_retrans_path,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &int_max
},
{
.ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS,
@@ -142,18 +154,21 @@ static ctl_table sctp_table[] = {
.data = &sctp_max_retrans_init,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &int_max
},
{
.ctl_name = NET_SCTP_HB_INTERVAL,
.procname = "hb_interval",
.data = &sctp_hb_interval,
- .maxlen = sizeof(long),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
- .extra1 = &rto_timer_min,
- .extra2 = &rto_timer_max
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
.ctl_name = NET_SCTP_PRESERVE_ENABLE,
@@ -161,23 +176,26 @@ static ctl_table sctp_table[] = {
.data = &sctp_cookie_preserve_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_RTO_ALPHA,
.procname = "rto_alpha_exp_divisor",
.data = &sctp_rto_alpha,
.maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_RTO_BETA,
.procname = "rto_beta_exp_divisor",
.data = &sctp_rto_beta,
.maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_ADDIP_ENABLE,
@@ -185,7 +203,8 @@ static ctl_table sctp_table[] = {
.data = &sctp_addip_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_PRSCTP_ENABLE,
@@ -193,7 +212,8 @@ static ctl_table sctp_table[] = {
.data = &sctp_prsctp_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec
},
{
.ctl_name = NET_SCTP_SACK_TIMEOUT,
@@ -201,8 +221,8 @@ static ctl_table sctp_table[] = {
.data = &sctp_sack_timeout,
.maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
- .strategy = &sctp_sysctl_jiffies_ms,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
.extra1 = &sack_timer_min,
.extra2 = &sack_timer_max,
},
@@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void)
{
unregister_sysctl_table(sctp_sysctl_header);
}
-
-/* Strategy function to convert jiffies to milliseconds. */
-static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen, void **context) {
-
- if (oldval) {
- size_t olen;
-
- if (oldlenp) {
- if (get_user(olen, oldlenp))
- return -EFAULT;
-
- if (olen != sizeof (int))
- return -EINVAL;
- }
- if (put_user((*(int *)(table->data) * 1000) / HZ,
- (int __user *)oldval) ||
- (oldlenp && put_user(sizeof (int), oldlenp)))
- return -EFAULT;
- }
- if (newval && newlen) {
- int new;
-
- if (newlen != sizeof (int))
- return -EINVAL;
-
- if (get_user(new, (int __user *)newval))
- return -EFAULT;
-
- *(int *)(table->data) = (new * HZ) / 1000;
- }
- return 1;
-}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 160f62ad1cc5..3e5936a5f671 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -49,6 +49,7 @@
*/
#include <linux/types.h>
+#include <linux/random.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
@@ -74,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
* parameter 'RTO.Initial'.
*/
peer->rtt = 0;
- peer->rto = sctp_rto_initial;
+ peer->rto = msecs_to_jiffies(sctp_rto_initial);
peer->rttvar = 0;
peer->srtt = 0;
peer->rto_pending = 0;
@@ -85,7 +86,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
peer->init_sent_count = 0;
- peer->state = SCTP_ACTIVE;
peer->param_flags = SPP_HB_DISABLE |
SPP_PMTUD_ENABLE |
SPP_SACKDELAY_ENABLE;
@@ -109,6 +109,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
peer->hb_timer.function = sctp_generate_heartbeat_event;
peer->hb_timer.data = (unsigned long)peer;
+ /* Initialize the 64-bit random nonce sent with heartbeat. */
+ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
+
atomic_set(&peer->refcnt, 1);
peer->dead = 0;
@@ -517,7 +520,9 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
unsigned long sctp_transport_timeout(struct sctp_transport *t)
{
unsigned long timeout;
- timeout = t->hbinterval + t->rto + sctp_jitter(t->rto);
+ timeout = t->rto + sctp_jitter(t->rto);
+ if (t->state != SCTP_UNCONFIRMED)
+ timeout += t->hbinterval;
timeout += jiffies;
return timeout;
}
diff --git a/net/socket.c b/net/socket.c
index 565f5e8d1191..1bc4167e0da8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -42,7 +42,7 @@
* Andi Kleen : Some small cleanups, optimizations,
* and fixed a copy_from_user() bug.
* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
- * Tigran Aivazian : Made listen(2) backlog sanity checks
+ * Tigran Aivazian : Made listen(2) backlog sanity checks
* protocol-independent
*
*
@@ -53,18 +53,17 @@
*
*
* This module is effectively the top level interface to the BSD socket
- * paradigm.
+ * paradigm.
*
* Based upon Swansea University Computer Society NET3.039
*/
-#include <linux/config.h>
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/net.h>
#include <linux/interrupt.h>
+#include <linux/rcupdate.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -97,25 +96,24 @@
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
+ size_t size, loff_t pos);
static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
-static int sock_mmap(struct file *file, struct vm_area_struct * vma);
+ size_t size, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
static unsigned int sock_poll(struct file *file,
struct poll_table_struct *wait);
-static long sock_ioctl(struct file *file,
- unsigned int cmd, unsigned long arg);
+static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
- unsigned int cmd, unsigned long arg);
+ unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_readv(struct file *file, const struct iovec *vector,
unsigned long count, loff_t *ppos);
static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
+ unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);
@@ -148,52 +146,8 @@ static struct file_operations socket_file_ops = {
* The protocol list. Each protocol is registered in here.
*/
-static struct net_proto_family *net_families[NPROTO];
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t net_family_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(net_family_lock);
-
-/* The strategy is: modifications net_family vector are short, do not
- sleep and veeery rare, but read access should be free of any exclusive
- locks.
- */
-
-static void net_family_write_lock(void)
-{
- spin_lock(&net_family_lock);
- while (atomic_read(&net_family_lockct) != 0) {
- spin_unlock(&net_family_lock);
-
- yield();
-
- spin_lock(&net_family_lock);
- }
-}
-
-static __inline__ void net_family_write_unlock(void)
-{
- spin_unlock(&net_family_lock);
-}
-
-static __inline__ void net_family_read_lock(void)
-{
- atomic_inc(&net_family_lockct);
- spin_unlock_wait(&net_family_lock);
-}
-
-static __inline__ void net_family_read_unlock(void)
-{
- atomic_dec(&net_family_lockct);
-}
-
-#else
-#define net_family_write_lock() do { } while(0)
-#define net_family_write_unlock() do { } while(0)
-#define net_family_read_lock() do { } while(0)
-#define net_family_read_unlock() do { } while(0)
-#endif
-
+static const struct net_proto_family *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
@@ -202,19 +156,20 @@ static __inline__ void net_family_read_unlock(void)
static DEFINE_PER_CPU(int, sockets_in_use) = 0;
/*
- * Support routines. Move socket addresses back and forth across the kernel/user
- * divide and look after the messy bits.
+ * Support routines.
+ * Move socket addresses back and forth across the kernel/user
+ * divide and look after the messy bits.
*/
-#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
+#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
16 for IP, 16 for IPX,
24 for IPv6,
- about 80 for AX.25
+ about 80 for AX.25
must be at least one bigger than
the AF_UNIX size (see net/unix/af_unix.c
- :unix_mkname()).
+ :unix_mkname()).
*/
-
+
/**
* move_addr_to_kernel - copy a socket address into kernel space
* @uaddr: Address in user space
@@ -228,11 +183,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
{
- if(ulen<0||ulen>MAX_SOCK_ADDR)
+ if (ulen < 0 || ulen > MAX_SOCK_ADDR)
return -EINVAL;
- if(ulen==0)
+ if (ulen == 0)
return 0;
- if(copy_from_user(kaddr,uaddr,ulen))
+ if (copy_from_user(kaddr, uaddr, ulen))
return -EFAULT;
return audit_sockaddr(ulen, kaddr);
}
@@ -253,51 +208,52 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
* length of the data is written over the length limit the user
* specified. Zero is returned for a success.
*/
-
-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
+
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+ int __user *ulen)
{
int err;
int len;
- if((err=get_user(len, ulen)))
+ err = get_user(len, ulen);
+ if (err)
return err;
- if(len>klen)
- len=klen;
- if(len<0 || len> MAX_SOCK_ADDR)
+ if (len > klen)
+ len = klen;
+ if (len < 0 || len > MAX_SOCK_ADDR)
return -EINVAL;
- if(len)
- {
+ if (len) {
if (audit_sockaddr(klen, kaddr))
return -ENOMEM;
- if(copy_to_user(uaddr,kaddr,len))
+ if (copy_to_user(uaddr, kaddr, len))
return -EFAULT;
}
/*
- * "fromlen shall refer to the value before truncation.."
- * 1003.1g
+ * "fromlen shall refer to the value before truncation.."
+ * 1003.1g
*/
return __put_user(klen, ulen);
}
#define SOCKFS_MAGIC 0x534F434B
-static kmem_cache_t * sock_inode_cachep __read_mostly;
+static kmem_cache_t *sock_inode_cachep __read_mostly;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
- ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+
+ ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wait);
-
+
ei->socket.fasync_list = NULL;
ei->socket.state = SS_UNCONNECTED;
ei->socket.flags = 0;
ei->socket.ops = NULL;
ei->socket.sk = NULL;
ei->socket.file = NULL;
- ei->socket.flags = 0;
return &ei->vfs_inode;
}
@@ -308,22 +264,25 @@ static void sock_destroy_inode(struct inode *inode)
container_of(inode, struct socket_alloc, vfs_inode));
}
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
{
- struct socket_alloc *ei = (struct socket_alloc *) foo;
+ struct socket_alloc *ei = (struct socket_alloc *)foo;
- if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR)
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
+ == SLAB_CTOR_CONSTRUCTOR)
inode_init_once(&ei->vfs_inode);
}
-
+
static int init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
- sizeof(struct socket_alloc),
- 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once, NULL);
+ sizeof(struct socket_alloc),
+ 0,
+ (SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_MEM_SPREAD),
+ init_once,
+ NULL);
if (sock_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -336,7 +295,8 @@ static struct super_operations sockfs_ops = {
};
static int sockfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+ int flags, const char *dev_name, void *data,
+ struct vfsmount *mnt)
{
return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
mnt);
@@ -349,12 +309,13 @@ static struct file_system_type sock_fs_type = {
.get_sb = sockfs_get_sb,
.kill_sb = kill_anon_super,
};
+
static int sockfs_delete_dentry(struct dentry *dentry)
{
return 1;
}
static struct dentry_operations sockfs_dentry_operations = {
- .d_delete = sockfs_delete_dentry,
+ .d_delete = sockfs_delete_dentry,
};
/*
@@ -478,10 +439,12 @@ struct socket *sockfd_lookup(int fd, int *err)
struct file *file;
struct socket *sock;
- if (!(file = fget(fd))) {
+ file = fget(fd);
+ if (!file) {
*err = -EBADF;
return NULL;
}
+
sock = sock_from_file(file, err);
if (!sock)
fput(file);
@@ -506,7 +469,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
/**
* sock_alloc - allocate a socket
- *
+ *
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
* NULL is returned.
@@ -514,8 +477,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
static struct socket *sock_alloc(void)
{
- struct inode * inode;
- struct socket * sock;
+ struct inode *inode;
+ struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
if (!inode)
@@ -523,7 +486,7 @@ static struct socket *sock_alloc(void)
sock = SOCKET_I(inode);
- inode->i_mode = S_IFSOCK|S_IRWXUGO;
+ inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
@@ -537,7 +500,7 @@ static struct socket *sock_alloc(void)
* a back door. Remember to keep it shut otherwise you'll let the
* creepy crawlies in.
*/
-
+
static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
{
return -ENXIO;
@@ -554,9 +517,9 @@ const struct file_operations bad_sock_fops = {
*
* The socket is released from the protocol stack if it has a release
* callback, and the inode is then released if the socket is bound to
- * an inode not a file.
+ * an inode not a file.
*/
-
+
void sock_release(struct socket *sock)
{
if (sock->ops) {
@@ -576,10 +539,10 @@ void sock_release(struct socket *sock)
iput(SOCK_INODE(sock));
return;
}
- sock->file=NULL;
+ sock->file = NULL;
}
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
@@ -622,14 +585,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
- msg->msg_iov = (struct iovec *)vec,
+ msg->msg_iov = (struct iovec *)vec;
msg->msg_iovlen = num;
result = sock_sendmsg(sock, msg, size);
set_fs(oldfs);
return result;
}
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
int err;
@@ -648,14 +611,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
return sock->ops->recvmsg(iocb, sock, msg, size, flags);
}
-int sock_recvmsg(struct socket *sock, struct msghdr *msg,
+int sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
- init_sync_kiocb(&iocb, NULL);
+ init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
@@ -663,9 +626,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
return ret;
}
-int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
- struct kvec *vec, size_t num,
- size_t size, int flags)
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size, int flags)
{
mm_segment_t oldfs = get_fs();
int result;
@@ -675,8 +637,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
- msg->msg_iov = (struct iovec *)vec,
- msg->msg_iovlen = num;
+ msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
@@ -703,7 +664,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
}
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ char __user *ubuf, size_t size,
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -721,20 +683,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
}
static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
- for (i = 0 ; i < nr_segs ; i++)
- size += iov[i].iov_len;
+ for (i = 0; i < nr_segs; i++)
+ size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
- msg->msg_iov = (struct iovec *) iov;
+ msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
@@ -749,7 +712,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
struct msghdr msg;
int ret;
- init_sync_kiocb(&iocb, NULL);
+ init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
@@ -759,7 +722,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
}
static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+ size_t count, loff_t pos)
{
struct sock_iocb siocb, *x;
@@ -772,24 +735,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
if (!x)
return -ENOMEM;
return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ &x->async_iov, 1);
}
static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
- for (i = 0 ; i < nr_segs ; i++)
- size += iov[i].iov_len;
+ for (i = 0; i < nr_segs; i++)
+ size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
- msg->msg_iov = (struct iovec *) iov;
+ msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
if (sock->type == SOCK_SEQPACKET)
@@ -816,7 +780,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov,
}
static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+ size_t count, loff_t pos)
{
struct sock_iocb siocb, *x;
@@ -830,46 +794,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
return -ENOMEM;
return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ &x->async_iov, 1);
}
-
/*
* Atomic setting of ioctl hooks to avoid race
* with module unload.
*/
static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
-void brioctl_set(int (*hook)(unsigned int, void __user *))
+void brioctl_set(int (*hook) (unsigned int, void __user *))
{
mutex_lock(&br_ioctl_mutex);
br_ioctl_hook = hook;
mutex_unlock(&br_ioctl_mutex);
}
+
EXPORT_SYMBOL(brioctl_set);
static DEFINE_MUTEX(vlan_ioctl_mutex);
-static int (*vlan_ioctl_hook)(void __user *arg);
+static int (*vlan_ioctl_hook) (void __user *arg);
-void vlan_ioctl_set(int (*hook)(void __user *))
+void vlan_ioctl_set(int (*hook) (void __user *))
{
mutex_lock(&vlan_ioctl_mutex);
vlan_ioctl_hook = hook;
mutex_unlock(&vlan_ioctl_mutex);
}
+
EXPORT_SYMBOL(vlan_ioctl_set);
static DEFINE_MUTEX(dlci_ioctl_mutex);
-static int (*dlci_ioctl_hook)(unsigned int, void __user *);
+static int (*dlci_ioctl_hook) (unsigned int, void __user *);
-void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
+void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
{
mutex_lock(&dlci_ioctl_mutex);
dlci_ioctl_hook = hook;
mutex_unlock(&dlci_ioctl_mutex);
}
+
EXPORT_SYMBOL(dlci_ioctl_set);
/*
@@ -891,8 +857,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
err = dev_ioctl(cmd, argp);
} else
-#endif /* CONFIG_WIRELESS_EXT */
- switch (cmd) {
+#endif /* CONFIG_WIRELESS_EXT */
+ switch (cmd) {
case FIOSETOWN:
case SIOCSPGRP:
err = -EFAULT;
@@ -902,7 +868,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case FIOGETOWN:
case SIOCGPGRP:
- err = put_user(sock->file->f_owner.pid, (int __user *)argp);
+ err = put_user(sock->file->f_owner.pid,
+ (int __user *)argp);
break;
case SIOCGIFBR:
case SIOCSIFBR:
@@ -913,7 +880,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
request_module("bridge");
mutex_lock(&br_ioctl_mutex);
- if (br_ioctl_hook)
+ if (br_ioctl_hook)
err = br_ioctl_hook(cmd, argp);
mutex_unlock(&br_ioctl_mutex);
break;
@@ -930,7 +897,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case SIOCGIFDIVERT:
case SIOCSIFDIVERT:
- /* Convert this to call through a hook */
+ /* Convert this to call through a hook */
err = divert_ioctl(cmd, argp);
break;
case SIOCADDDLCI:
@@ -955,7 +922,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
if (err == -ENOIOCTLCMD)
err = dev_ioctl(cmd, argp);
break;
- }
+ }
return err;
}
@@ -963,7 +930,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
struct socket *sock = NULL;
-
+
err = security_socket_create(family, type, protocol, 1);
if (err)
goto out;
@@ -974,26 +941,33 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
goto out;
}
- security_socket_post_create(sock, family, type, protocol, 1);
sock->type = type;
+ err = security_socket_post_create(sock, family, type, protocol, 1);
+ if (err)
+ goto out_release;
+
out:
*res = sock;
return err;
+out_release:
+ sock_release(sock);
+ sock = NULL;
+ goto out;
}
/* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table * wait)
+static unsigned int sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock;
/*
- * We can't return errors to poll, so it's either yes or no.
+ * We can't return errors to poll, so it's either yes or no.
*/
sock = file->private_data;
return sock->ops->poll(file, sock, wait);
}
-static int sock_mmap(struct file * file, struct vm_area_struct * vma)
+static int sock_mmap(struct file *file, struct vm_area_struct *vma)
{
struct socket *sock = file->private_data;
@@ -1003,12 +977,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma)
static int sock_close(struct inode *inode, struct file *filp)
{
/*
- * It was possible the inode is NULL we were
- * closing an unfinished socket.
+ * It was possible the inode is NULL we were
+ * closing an unfinished socket.
*/
- if (!inode)
- {
+ if (!inode) {
printk(KERN_DEBUG "sock_close: NULL inode\n");
return 0;
}
@@ -1034,57 +1007,52 @@ static int sock_close(struct inode *inode, struct file *filp)
static int sock_fasync(int fd, struct file *filp, int on)
{
- struct fasync_struct *fa, *fna=NULL, **prev;
+ struct fasync_struct *fa, *fna = NULL, **prev;
struct socket *sock;
struct sock *sk;
- if (on)
- {
+ if (on) {
fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
- if(fna==NULL)
+ if (fna == NULL)
return -ENOMEM;
}
sock = filp->private_data;
- if ((sk=sock->sk) == NULL) {
+ sk = sock->sk;
+ if (sk == NULL) {
kfree(fna);
return -EINVAL;
}
lock_sock(sk);
- prev=&(sock->fasync_list);
+ prev = &(sock->fasync_list);
- for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
- if (fa->fa_file==filp)
+ for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+ if (fa->fa_file == filp)
break;
- if(on)
- {
- if(fa!=NULL)
- {
+ if (on) {
+ if (fa != NULL) {
write_lock_bh(&sk->sk_callback_lock);
- fa->fa_fd=fd;
+ fa->fa_fd = fd;
write_unlock_bh(&sk->sk_callback_lock);
kfree(fna);
goto out;
}
- fna->fa_file=filp;
- fna->fa_fd=fd;
- fna->magic=FASYNC_MAGIC;
- fna->fa_next=sock->fasync_list;
+ fna->fa_file = filp;
+ fna->fa_fd = fd;
+ fna->magic = FASYNC_MAGIC;
+ fna->fa_next = sock->fasync_list;
write_lock_bh(&sk->sk_callback_lock);
- sock->fasync_list=fna;
+ sock->fasync_list = fna;
write_unlock_bh(&sk->sk_callback_lock);
- }
- else
- {
- if (fa!=NULL)
- {
+ } else {
+ if (fa != NULL) {
write_lock_bh(&sk->sk_callback_lock);
- *prev=fa->fa_next;
+ *prev = fa->fa_next;
write_unlock_bh(&sk->sk_callback_lock);
kfree(fa);
}
@@ -1101,10 +1069,9 @@ int sock_wake_async(struct socket *sock, int how, int band)
{
if (!sock || !sock->fasync_list)
return -1;
- switch (how)
- {
+ switch (how) {
case 1:
-
+
if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
break;
goto call_kill;
@@ -1113,7 +1080,7 @@ int sock_wake_async(struct socket *sock, int how, int band)
break;
/* fall through */
case 0:
- call_kill:
+call_kill:
__kill_fasync(sock->fasync_list, SIGIO, band);
break;
case 3:
@@ -1122,13 +1089,15 @@ int sock_wake_async(struct socket *sock, int how, int band)
return 0;
}
-static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+static int __sock_create(int family, int type, int protocol,
+ struct socket **res, int kern)
{
int err;
struct socket *sock;
+ const struct net_proto_family *pf;
/*
- * Check protocol is in range
+ * Check protocol is in range
*/
if (family < 0 || family >= NPROTO)
return -EAFNOSUPPORT;
@@ -1141,10 +1110,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
deadlock in module load.
*/
if (family == PF_INET && type == SOCK_PACKET) {
- static int warned;
+ static int warned;
if (!warned) {
warned = 1;
- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+ printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+ current->comm);
}
family = PF_PACKET;
}
@@ -1152,78 +1122,84 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
err = security_socket_create(family, type, protocol, kern);
if (err)
return err;
-
+
+ /*
+ * Allocate the socket and allow the family to set things up. if
+ * the protocol is 0, the family is instructed to select an appropriate
+ * default.
+ */
+ sock = sock_alloc();
+ if (!sock) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "socket: no more sockets\n");
+ return -ENFILE; /* Not exactly a match, but its the
+ closest posix thing */
+ }
+
+ sock->type = type;
+
#if defined(CONFIG_KMOD)
- /* Attempt to load a protocol module if the find failed.
- *
- * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
+ /* Attempt to load a protocol module if the find failed.
+ *
+ * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
- if (net_families[family]==NULL)
- {
- request_module("net-pf-%d",family);
- }
+ if (net_families[family] == NULL)
+ request_module("net-pf-%d", family);
#endif
- net_family_read_lock();
- if (net_families[family] == NULL) {
- err = -EAFNOSUPPORT;
- goto out;
- }
-
-/*
- * Allocate the socket and allow the family to set things up. if
- * the protocol is 0, the family is instructed to select an appropriate
- * default.
- */
-
- if (!(sock = sock_alloc())) {
- printk(KERN_WARNING "socket: no more sockets\n");
- err = -ENFILE; /* Not exactly a match, but its the
- closest posix thing */
- goto out;
- }
-
- sock->type = type;
+ rcu_read_lock();
+ pf = rcu_dereference(net_families[family]);
+ err = -EAFNOSUPPORT;
+ if (!pf)
+ goto out_release;
/*
* We will call the ->create function, that possibly is in a loadable
* module, so we have to bump that loadable module refcnt first.
*/
- err = -EAFNOSUPPORT;
- if (!try_module_get(net_families[family]->owner))
+ if (!try_module_get(pf->owner))
goto out_release;
- if ((err = net_families[family]->create(sock, protocol)) < 0) {
- sock->ops = NULL;
+ /* Now protected by module ref count */
+ rcu_read_unlock();
+
+ err = pf->create(sock, protocol);
+ if (err < 0)
goto out_module_put;
- }
/*
* Now to bump the refcnt of the [loadable] module that owns this
* socket at sock_release time we decrement its refcnt.
*/
- if (!try_module_get(sock->ops->owner)) {
- sock->ops = NULL;
- goto out_module_put;
- }
+ if (!try_module_get(sock->ops->owner))
+ goto out_module_busy;
+
/*
* Now that we're done with the ->create function, the [loadable]
* module can have its refcnt decremented
*/
- module_put(net_families[family]->owner);
+ module_put(pf->owner);
+ err = security_socket_post_create(sock, family, type, protocol, kern);
+ if (err)
+ goto out_release;
*res = sock;
- security_socket_post_create(sock, family, type, protocol, kern);
-out:
- net_family_read_unlock();
- return err;
+ return 0;
+
+out_module_busy:
+ err = -EAFNOSUPPORT;
out_module_put:
- module_put(net_families[family]->owner);
-out_release:
+ sock->ops = NULL;
+ module_put(pf->owner);
+out_sock_release:
sock_release(sock);
- goto out;
+ return err;
+
+out_release:
+ rcu_read_unlock();
+ goto out_sock_release;
}
int sock_create(int family, int type, int protocol, struct socket **res)
@@ -1262,7 +1238,8 @@ out_release:
* Create a pair of connected sockets.
*/
-asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
+asmlinkage long sys_socketpair(int family, int type, int protocol,
+ int __user *usockvec)
{
struct socket *sock1, *sock2;
int fd1, fd2, err;
@@ -1281,7 +1258,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
goto out_release_1;
err = sock1->ops->socketpair(sock1, sock2);
- if (err < 0)
+ if (err < 0)
goto out_release_both;
fd1 = fd2 = -1;
@@ -1300,7 +1277,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
* Not kernel problem.
*/
- err = put_user(fd1, &usockvec[0]);
+ err = put_user(fd1, &usockvec[0]);
if (!err)
err = put_user(fd2, &usockvec[1]);
if (!err)
@@ -1311,19 +1288,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
return err;
out_close_1:
- sock_release(sock2);
+ sock_release(sock2);
sys_close(fd1);
return err;
out_release_both:
- sock_release(sock2);
+ sock_release(sock2);
out_release_1:
- sock_release(sock1);
+ sock_release(sock1);
out:
return err;
}
-
/*
* Bind a name to a socket. Nothing much to do here since it's
* the protocol's responsibility to handle the local address.
@@ -1338,35 +1314,39 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
char address[MAX_SOCK_ADDR];
int err, fput_needed;
- if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
- {
- if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
- err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if(sock) {
+ err = move_addr_to_kernel(umyaddr, addrlen, address);
+ if (err >= 0) {
+ err = security_socket_bind(sock,
+ (struct sockaddr *)address,
+ addrlen);
if (!err)
err = sock->ops->bind(sock,
- (struct sockaddr *)address, addrlen);
+ (struct sockaddr *)
+ address, addrlen);
}
fput_light(sock->file, fput_needed);
- }
+ }
return err;
}
-
/*
* Perform a listen. Basically, we allow the protocol to do anything
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
-int sysctl_somaxconn = SOMAXCONN;
+int sysctl_somaxconn __read_mostly = SOMAXCONN;
asmlinkage long sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
-
- if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
- if ((unsigned) backlog > sysctl_somaxconn)
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock) {
+ if ((unsigned)backlog > sysctl_somaxconn)
backlog = sysctl_somaxconn;
err = security_socket_listen(sock, backlog);
@@ -1378,7 +1358,6 @@ asmlinkage long sys_listen(int fd, int backlog)
return err;
}
-
/*
* For accept, we attempt to create a new socket, set up the link
* with the client, wake up the client, then return the new
@@ -1391,7 +1370,8 @@ asmlinkage long sys_listen(int fd, int backlog)
* clean when we restucture accept also.
*/
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1403,7 +1383,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
goto out;
err = -ENFILE;
- if (!(newsock = sock_alloc()))
+ if (!(newsock = sock_alloc()))
goto out_put;
newsock->type = sock->type;
@@ -1435,11 +1415,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
goto out_fd;
if (upeer_sockaddr) {
- if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
+ if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+ &len, 2) < 0) {
err = -ECONNABORTED;
goto out_fd;
}
- err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+ err = move_addr_to_user(address, len, upeer_sockaddr,
+ upeer_addrlen);
if (err < 0)
goto out_fd;
}
@@ -1461,7 +1443,6 @@ out_fd:
goto out_put;
}
-
/*
* Attempt to connect to a socket with the server address. The address
* is in user space so we verify it is OK and move it to kernel space.
@@ -1474,7 +1455,8 @@ out_fd:
* include the -EINPROGRESS status for such sockets.
*/
-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+ int addrlen)
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
@@ -1487,11 +1469,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl
if (err < 0)
goto out_put;
- err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+ err =
+ security_socket_connect(sock, (struct sockaddr *)address, addrlen);
if (err)
goto out_put;
- err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+ err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
sock->file->f_flags);
out_put:
fput_light(sock->file, fput_needed);
@@ -1504,12 +1487,13 @@ out:
* name to user space.
*/
-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len)
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
int len, err, fput_needed;
-
+
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -1534,22 +1518,27 @@ out:
* name to user space.
*/
-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len)
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
int len, err, fput_needed;
- if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock != NULL) {
err = security_socket_getpeername(sock);
if (err) {
fput_light(sock->file, fput_needed);
return err;
}
- err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
+ err =
+ sock->ops->getname(sock, (struct sockaddr *)address, &len,
+ 1);
if (!err)
- err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
+ err = move_addr_to_user(address, len, usockaddr,
+ usockaddr_len);
fput_light(sock->file, fput_needed);
}
return err;
@@ -1561,8 +1550,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _
* the protocol.
*/
-asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
- struct sockaddr __user *addr, int addr_len)
+asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+ unsigned flags, struct sockaddr __user *addr,
+ int addr_len)
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
@@ -1579,54 +1569,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag
sock = sock_from_file(sock_file, &err);
if (!sock)
goto out_put;
- iov.iov_base=buff;
- iov.iov_len=len;
- msg.msg_name=NULL;
- msg.msg_iov=&iov;
- msg.msg_iovlen=1;
- msg.msg_control=NULL;
- msg.msg_controllen=0;
- msg.msg_namelen=0;
+ iov.iov_base = buff;
+ iov.iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
if (addr) {
err = move_addr_to_kernel(addr, addr_len, address);
if (err < 0)
goto out_put;
- msg.msg_name=address;
- msg.msg_namelen=addr_len;
+ msg.msg_name = address;
+ msg.msg_namelen = addr_len;
}
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
err = sock_sendmsg(sock, &msg, len);
-out_put:
+out_put:
fput_light(sock_file, fput_needed);
return err;
}
/*
- * Send a datagram down a socket.
+ * Send a datagram down a socket.
*/
-asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
+asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
{
return sys_sendto(fd, buff, len, flags, NULL, 0);
}
/*
- * Receive a frame from the socket and optionally record the address of the
+ * Receive a frame from the socket and optionally record the address of the
* sender. We verify the buffers are writable and if needed move the
* sender address from kernel to user space.
*/
-asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
- struct sockaddr __user *addr, int __user *addr_len)
+asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+ unsigned flags, struct sockaddr __user *addr,
+ int __user *addr_len)
{
struct socket *sock;
struct iovec iov;
struct msghdr msg;
char address[MAX_SOCK_ADDR];
- int err,err2;
+ int err, err2;
struct file *sock_file;
int fput_needed;
@@ -1638,23 +1629,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f
if (!sock)
goto out;
- msg.msg_control=NULL;
- msg.msg_controllen=0;
- msg.msg_iovlen=1;
- msg.msg_iov=&iov;
- iov.iov_len=size;
- iov.iov_base=ubuf;
- msg.msg_name=address;
- msg.msg_namelen=MAX_SOCK_ADDR;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_iov = &iov;
+ iov.iov_len = size;
+ iov.iov_base = ubuf;
+ msg.msg_name = address;
+ msg.msg_namelen = MAX_SOCK_ADDR;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
- err=sock_recvmsg(sock, &msg, size, flags);
+ err = sock_recvmsg(sock, &msg, size, flags);
- if(err >= 0 && addr != NULL)
- {
- err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
- if(err2<0)
- err=err2;
+ if (err >= 0 && addr != NULL) {
+ err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+ if (err2 < 0)
+ err = err2;
}
out:
fput_light(sock_file, fput_needed);
@@ -1662,10 +1652,11 @@ out:
}
/*
- * Receive a datagram from a socket.
+ * Receive a datagram from a socket.
*/
-asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
+asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+ unsigned flags)
{
return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}
@@ -1675,24 +1666,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags
* to pass the user mode parameter for the protocols to sort out.
*/
-asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
+asmlinkage long sys_setsockopt(int fd, int level, int optname,
+ char __user *optval, int optlen)
{
int err, fput_needed;
struct socket *sock;
if (optlen < 0)
return -EINVAL;
-
- if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL)
- {
- err = security_socket_setsockopt(sock,level,optname);
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock != NULL) {
+ err = security_socket_setsockopt(sock, level, optname);
if (err)
goto out_put;
if (level == SOL_SOCKET)
- err=sock_setsockopt(sock,level,optname,optval,optlen);
+ err =
+ sock_setsockopt(sock, level, optname, optval,
+ optlen);
else
- err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+ err =
+ sock->ops->setsockopt(sock, level, optname, optval,
+ optlen);
out_put:
fput_light(sock->file, fput_needed);
}
@@ -1704,27 +1700,32 @@ out_put:
* to pass a user mode parameter for the protocols to sort out.
*/
-asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
+asmlinkage long sys_getsockopt(int fd, int level, int optname,
+ char __user *optval, int __user *optlen)
{
int err, fput_needed;
struct socket *sock;
- if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock != NULL) {
err = security_socket_getsockopt(sock, level, optname);
if (err)
goto out_put;
if (level == SOL_SOCKET)
- err=sock_getsockopt(sock,level,optname,optval,optlen);
+ err =
+ sock_getsockopt(sock, level, optname, optval,
+ optlen);
else
- err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+ err =
+ sock->ops->getsockopt(sock, level, optname, optval,
+ optlen);
out_put:
fput_light(sock->file, fput_needed);
}
return err;
}
-
/*
* Shutdown a socket.
*/
@@ -1734,8 +1735,8 @@ asmlinkage long sys_shutdown(int fd, int how)
int err, fput_needed;
struct socket *sock;
- if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
- {
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock != NULL) {
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
@@ -1744,41 +1745,42 @@ asmlinkage long sys_shutdown(int fd, int how)
return err;
}
-/* A couple of helpful macros for getting the address of the 32/64 bit
+/* A couple of helpful macros for getting the address of the 32/64 bit
* fields which are the same type (int / unsigned) on our platforms.
*/
#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
-
/*
* BSD sendmsg interface
*/
asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
{
- struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+ struct compat_msghdr __user *msg_compat =
+ (struct compat_msghdr __user *)msg;
struct socket *sock;
char address[MAX_SOCK_ADDR];
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
unsigned char ctl[sizeof(struct cmsghdr) + 20]
- __attribute__ ((aligned (sizeof(__kernel_size_t))));
- /* 20 is size of ipv6_pktinfo */
+ __attribute__ ((aligned(sizeof(__kernel_size_t))));
+ /* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl;
struct msghdr msg_sys;
int err, ctl_len, iov_size, total_len;
int fput_needed;
-
+
err = -EFAULT;
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(&msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+ }
+ else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
+ if (!sock)
goto out;
/* do not move before msg_sys is valid */
@@ -1786,7 +1788,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
if (msg_sys.msg_iovlen > UIO_MAXIOV)
goto out_put;
- /* Check whether to allocate the iovec area*/
+ /* Check whether to allocate the iovec area */
err = -ENOMEM;
iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1800,7 +1802,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
} else
err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
- if (err < 0)
+ if (err < 0)
goto out_freeiov;
total_len = err;
@@ -1808,18 +1810,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
if (msg_sys.msg_controllen > INT_MAX)
goto out_freeiov;
- ctl_len = msg_sys.msg_controllen;
+ ctl_len = msg_sys.msg_controllen;
if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
- err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+ err =
+ cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+ sizeof(ctl));
if (err)
goto out_freeiov;
ctl_buf = msg_sys.msg_control;
ctl_len = msg_sys.msg_controllen;
} else if (ctl_len) {
- if (ctl_len > sizeof(ctl))
- {
+ if (ctl_len > sizeof(ctl)) {
ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
- if (ctl_buf == NULL)
+ if (ctl_buf == NULL)
goto out_freeiov;
}
err = -EFAULT;
@@ -1828,7 +1831,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
* Afterwards, it will be a kernel pointer. Thus the compiler-assisted
* checking falls down on this.
*/
- if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
+ if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+ ctl_len))
goto out_freectl;
msg_sys.msg_control = ctl_buf;
}
@@ -1839,14 +1843,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
err = sock_sendmsg(sock, &msg_sys, total_len);
out_freectl:
- if (ctl_buf != ctl)
+ if (ctl_buf != ctl)
sock_kfree_s(sock->sk, ctl_buf, ctl_len);
out_freeiov:
if (iov != iovstack)
sock_kfree_s(sock->sk, iov, iov_size);
out_put:
fput_light(sock->file, fput_needed);
-out:
+out:
return err;
}
@@ -1854,12 +1858,14 @@ out:
* BSD recvmsg interface
*/
-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+ unsigned int flags)
{
- struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+ struct compat_msghdr __user *msg_compat =
+ (struct compat_msghdr __user *)msg;
struct socket *sock;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack;
+ struct iovec *iov = iovstack;
struct msghdr msg_sys;
unsigned long cmsg_ptr;
int err, iov_size, total_len, len;
@@ -1871,13 +1877,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
/* user mode address pointers */
struct sockaddr __user *uaddr;
int __user *uaddr_len;
-
+
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(&msg_sys, msg_compat))
return -EFAULT;
- } else
- if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
- return -EFAULT;
+ }
+ else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+ return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
@@ -1886,8 +1892,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
err = -EMSGSIZE;
if (msg_sys.msg_iovlen > UIO_MAXIOV)
goto out_put;
-
- /* Check whether to allocate the iovec area*/
+
+ /* Check whether to allocate the iovec area */
err = -ENOMEM;
iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1897,11 +1903,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
}
/*
- * Save the user-mode address (verify_iovec will change the
- * kernel msghdr to use the kernel address space)
+ * Save the user-mode address (verify_iovec will change the
+ * kernel msghdr to use the kernel address space)
*/
-
- uaddr = (void __user *) msg_sys.msg_name;
+
+ uaddr = (void __user *)msg_sys.msg_name;
uaddr_len = COMPAT_NAMELEN(msg);
if (MSG_CMSG_COMPAT & flags) {
err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
@@ -1909,13 +1915,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
if (err < 0)
goto out_freeiov;
- total_len=err;
+ total_len = err;
cmsg_ptr = (unsigned long)msg_sys.msg_control;
msg_sys.msg_flags = 0;
if (MSG_CMSG_COMPAT & flags)
msg_sys.msg_flags = MSG_CMSG_COMPAT;
-
+
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1924,7 +1930,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
len = err;
if (uaddr != NULL) {
- err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+ err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+ uaddr_len);
if (err < 0)
goto out_freeiov;
}
@@ -1933,10 +1940,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
if (err)
goto out_freeiov;
if (MSG_CMSG_COMPAT & flags)
- err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
+ err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
&msg_compat->msg_controllen);
else
- err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
+ err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
&msg->msg_controllen);
if (err)
goto out_freeiov;
@@ -1955,163 +1962,187 @@ out:
/* Argument list sizes for sys_socketcall */
#define AL(x) ((x) * sizeof(unsigned long))
-static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+static const unsigned char nargs[18]={
+ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+ AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+ AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+};
+
#undef AL
/*
- * System call vectors.
+ * System call vectors.
*
* Argument checking cleaned up. Saved 20% in size.
* This function doesn't need to set the kernel lock because
- * it is set by the callees.
+ * it is set by the callees.
*/
asmlinkage long sys_socketcall(int call, unsigned long __user *args)
{
unsigned long a[6];
- unsigned long a0,a1;
+ unsigned long a0, a1;
int err;
- if(call<1||call>SYS_RECVMSG)
+ if (call < 1 || call > SYS_RECVMSG)
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, nargs[call]))
return -EFAULT;
- err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
+ err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
if (err)
return err;
- a0=a[0];
- a1=a[1];
-
- switch(call)
- {
- case SYS_SOCKET:
- err = sys_socket(a0,a1,a[2]);
- break;
- case SYS_BIND:
- err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
- break;
- case SYS_CONNECT:
- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
- break;
- case SYS_LISTEN:
- err = sys_listen(a0,a1);
- break;
- case SYS_ACCEPT:
- err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
- break;
- case SYS_GETSOCKNAME:
- err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
- break;
- case SYS_GETPEERNAME:
- err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
- break;
- case SYS_SOCKETPAIR:
- err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
- break;
- case SYS_SEND:
- err = sys_send(a0, (void __user *)a1, a[2], a[3]);
- break;
- case SYS_SENDTO:
- err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
- (struct sockaddr __user *)a[4], a[5]);
- break;
- case SYS_RECV:
- err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
- break;
- case SYS_RECVFROM:
- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
- (struct sockaddr __user *)a[4], (int __user *)a[5]);
- break;
- case SYS_SHUTDOWN:
- err = sys_shutdown(a0,a1);
- break;
- case SYS_SETSOCKOPT:
- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
- break;
- case SYS_GETSOCKOPT:
- err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
- break;
- case SYS_SENDMSG:
- err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
- break;
- case SYS_RECVMSG:
- err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
- break;
- default:
- err = -EINVAL;
- break;
+ a0 = a[0];
+ a1 = a[1];
+
+ switch (call) {
+ case SYS_SOCKET:
+ err = sys_socket(a0, a1, a[2]);
+ break;
+ case SYS_BIND:
+ err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+ break;
+ case SYS_CONNECT:
+ err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+ break;
+ case SYS_LISTEN:
+ err = sys_listen(a0, a1);
+ break;
+ case SYS_ACCEPT:
+ err =
+ sys_accept(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2]);
+ break;
+ case SYS_GETSOCKNAME:
+ err =
+ sys_getsockname(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2]);
+ break;
+ case SYS_GETPEERNAME:
+ err =
+ sys_getpeername(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2]);
+ break;
+ case SYS_SOCKETPAIR:
+ err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+ break;
+ case SYS_SEND:
+ err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+ break;
+ case SYS_SENDTO:
+ err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+ (struct sockaddr __user *)a[4], a[5]);
+ break;
+ case SYS_RECV:
+ err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+ break;
+ case SYS_RECVFROM:
+ err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+ (struct sockaddr __user *)a[4],
+ (int __user *)a[5]);
+ break;
+ case SYS_SHUTDOWN:
+ err = sys_shutdown(a0, a1);
+ break;
+ case SYS_SETSOCKOPT:
+ err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+ break;
+ case SYS_GETSOCKOPT:
+ err =
+ sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+ (int __user *)a[4]);
+ break;
+ case SYS_SENDMSG:
+ err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+ break;
+ case SYS_RECVMSG:
+ err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+ break;
+ default:
+ err = -EINVAL;
+ break;
}
return err;
}
-#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+#endif /* __ARCH_WANT_SYS_SOCKETCALL */
-/*
+/**
+ * sock_register - add a socket protocol handler
+ * @ops: description of protocol
+ *
* This function is called by a protocol handler that wants to
* advertise its address family, and have it linked into the
- * SOCKET module.
+ * socket interface. The value ops->family coresponds to the
+ * socket system call protocol family.
*/
-
-int sock_register(struct net_proto_family *ops)
+int sock_register(const struct net_proto_family *ops)
{
int err;
if (ops->family >= NPROTO) {
- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
+ printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+ NPROTO);
return -ENOBUFS;
}
- net_family_write_lock();
- err = -EEXIST;
- if (net_families[ops->family] == NULL) {
- net_families[ops->family]=ops;
+
+ spin_lock(&net_family_lock);
+ if (net_families[ops->family])
+ err = -EEXIST;
+ else {
+ net_families[ops->family] = ops;
err = 0;
}
- net_family_write_unlock();
- printk(KERN_INFO "NET: Registered protocol family %d\n",
- ops->family);
+ spin_unlock(&net_family_lock);
+
+ printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
return err;
}
-/*
+/**
+ * sock_unregister - remove a protocol handler
+ * @family: protocol family to remove
+ *
* This function is called by a protocol handler that wants to
* remove its address family, and have it unlinked from the
- * SOCKET module.
+ * new socket creation.
+ *
+ * If protocol handler is a module, then it can use module reference
+ * counts to protect against new references. If protocol handler is not
+ * a module then it needs to provide its own protection in
+ * the ops->create routine.
*/
-
-int sock_unregister(int family)
+void sock_unregister(int family)
{
- if (family < 0 || family >= NPROTO)
- return -1;
+ BUG_ON(family < 0 || family >= NPROTO);
- net_family_write_lock();
- net_families[family]=NULL;
- net_family_write_unlock();
- printk(KERN_INFO "NET: Unregistered protocol family %d\n",
- family);
- return 0;
+ spin_lock(&net_family_lock);
+ net_families[family] = NULL;
+ spin_unlock(&net_family_lock);
+
+ synchronize_rcu();
+
+ printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
}
static int __init sock_init(void)
{
/*
- * Initialize sock SLAB cache.
+ * Initialize sock SLAB cache.
*/
-
+
sk_init();
/*
- * Initialize skbuff SLAB cache
+ * Initialize skbuff SLAB cache
*/
skb_init();
/*
- * Initialize the protocols module.
+ * Initialize the protocols module.
*/
init_inodecache();
@@ -2137,7 +2168,7 @@ void socket_seq_show(struct seq_file *seq)
int counter = 0;
for_each_possible_cpu(cpu)
- counter += per_cpu(sockets_in_use, cpu);
+ counter += per_cpu(sockets_in_use, cpu);
/* It can be negative, by the way. 8) */
if (counter < 0)
@@ -2145,11 +2176,11 @@ void socket_seq_show(struct seq_file *seq)
seq_printf(seq, "sockets: used %d\n", counter);
}
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file, unsigned cmd,
- unsigned long arg)
+ unsigned long arg)
{
struct socket *sock = file->private_data;
int ret = -ENOIOCTLCMD;
@@ -2161,6 +2192,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd,
}
#endif
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+ return sock->ops->bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+ return sock->ops->listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+ struct sock *sk = sock->sk;
+ int err;
+
+ err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+ newsock);
+ if (err < 0)
+ goto done;
+
+ err = sock->ops->accept(sock, *newsock, flags);
+ if (err < 0) {
+ sock_release(*newsock);
+ goto done;
+ }
+
+ (*newsock)->ops = sock->ops;
+
+done:
+ return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+ int flags)
+{
+ return sock->ops->connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+ int *addrlen)
+{
+ return sock->ops->getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+ int *addrlen)
+{
+ return sock->ops->getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+ char *optval, int *optlen)
+{
+ mm_segment_t oldfs = get_fs();
+ int err;
+
+ set_fs(KERNEL_DS);
+ if (level == SOL_SOCKET)
+ err = sock_getsockopt(sock, level, optname, optval, optlen);
+ else
+ err = sock->ops->getsockopt(sock, level, optname, optval,
+ optlen);
+ set_fs(oldfs);
+ return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+ char *optval, int optlen)
+{
+ mm_segment_t oldfs = get_fs();
+ int err;
+
+ set_fs(KERNEL_DS);
+ if (level == SOL_SOCKET)
+ err = sock_setsockopt(sock, level, optname, optval, optlen);
+ else
+ err = sock->ops->setsockopt(sock, level, optname, optval,
+ optlen);
+ set_fs(oldfs);
+ return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+ size_t size, int flags)
+{
+ if (sock->ops->sendpage)
+ return sock->ops->sendpage(sock, page, offset, size, flags);
+
+ return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+ mm_segment_t oldfs = get_fs();
+ int err;
+
+ set_fs(KERNEL_DS);
+ err = sock->ops->ioctl(sock, cmd, arg);
+ set_fs(oldfs);
+
+ return err;
+}
+
/* ABI emulation layers need these two */
EXPORT_SYMBOL(move_addr_to_kernel);
EXPORT_SYMBOL(move_addr_to_user);
@@ -2177,3 +2311,13 @@ EXPORT_SYMBOL(sock_wake_async);
EXPORT_SYMBOL(sockfd_lookup);
EXPORT_SYMBOL(kernel_sendmsg);
EXPORT_SYMBOL(kernel_recvmsg);
+EXPORT_SYMBOL(kernel_bind);
+EXPORT_SYMBOL(kernel_listen);
+EXPORT_SYMBOL(kernel_accept);
+EXPORT_SYMBOL(kernel_connect);
+EXPORT_SYMBOL(kernel_getsockname);
+EXPORT_SYMBOL(kernel_getpeername);
+EXPORT_SYMBOL(kernel_getsockopt);
+EXPORT_SYMBOL(kernel_setsockopt);
+EXPORT_SYMBOL(kernel_sendpage);
+EXPORT_SYMBOL(kernel_sock_ioctl);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 55163af3dcaf..993ff1a5d945 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -331,8 +331,8 @@ rpcauth_unbindcred(struct rpc_task *task)
task->tk_msg.rpc_cred = NULL;
}
-u32 *
-rpcauth_marshcred(struct rpc_task *task, u32 *p)
+__be32 *
+rpcauth_marshcred(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
@@ -342,8 +342,8 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p)
return cred->cr_ops->crmarshal(task, p);
}
-u32 *
-rpcauth_checkverf(struct rpc_task *task, u32 *p)
+__be32 *
+rpcauth_checkverf(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
@@ -355,7 +355,7 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p)
int
rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
- u32 *data, void *obj)
+ __be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
@@ -369,7 +369,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
int
rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
- u32 *data, void *obj)
+ __be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 519ebc17c028..a6ed2d22a6e6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -88,7 +88,6 @@ struct gss_auth {
struct list_head upcalls;
struct rpc_clnt *client;
struct dentry *dentry;
- char path[48];
spinlock_t lock;
};
@@ -225,9 +224,8 @@ gss_alloc_context(void)
{
struct gss_cl_ctx *ctx;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
- memset(ctx, 0, sizeof(*ctx));
ctx->gc_proc = RPC_GSS_PROC_DATA;
ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */
spin_lock_init(&ctx->gc_seq_lock);
@@ -391,9 +389,8 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
{
struct gss_upcall_msg *gss_msg;
- gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL);
+ gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
if (gss_msg != NULL) {
- memset(gss_msg, 0, sizeof(*gss_msg));
INIT_LIST_HEAD(&gss_msg->list);
rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
init_waitqueue_head(&gss_msg->waitqueue);
@@ -692,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
if (err)
goto err_put_mech;
- snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
- clnt->cl_pathname,
- gss_auth->mech->gm_name);
- gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+ clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
if (IS_ERR(gss_auth->dentry)) {
err = PTR_ERR(gss_auth->dentry);
goto err_put_mech;
@@ -720,8 +715,7 @@ gss_destroy(struct rpc_auth *auth)
auth, auth->au_flavor);
gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- rpc_unlink(gss_auth->path);
- dput(gss_auth->dentry);
+ rpc_unlink(gss_auth->dentry);
gss_auth->dentry = NULL;
gss_mech_put(gss_auth->mech);
@@ -776,10 +770,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
acred->uid, auth->au_flavor);
- if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+ if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
goto out_err;
- memset(cred, 0, sizeof(*cred));
atomic_set(&cred->gc_count, 1);
cred->gc_uid = acred->uid;
/*
@@ -833,14 +826,14 @@ out:
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
-static u32 *
-gss_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+gss_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- u32 *cred_len;
+ __be32 *cred_len;
struct rpc_rqst *req = task->tk_rqstp;
u32 maj_stat = 0;
struct xdr_netobj mic;
@@ -901,12 +894,12 @@ gss_refresh(struct rpc_task *task)
return 0;
}
-static u32 *
-gss_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+gss_validate(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- u32 seq;
+ __be32 seq;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
@@ -947,13 +940,14 @@ out_bad:
static inline int
gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+ kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf integ_buf;
- u32 *integ_len = NULL;
+ __be32 *integ_len = NULL;
struct xdr_netobj mic;
- u32 offset, *q;
+ u32 offset;
+ __be32 *q;
struct kvec *iov;
u32 maj_stat = 0;
int status = -EIO;
@@ -1039,13 +1033,13 @@ out:
static inline int
gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+ kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
u32 offset;
u32 maj_stat;
int status;
- u32 *opaque_len;
+ __be32 *opaque_len;
struct page **inpages;
int first;
int pad;
@@ -1102,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
static int
gss_wrap_req(struct rpc_task *task,
- kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
+ kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1139,7 +1133,7 @@ out:
static inline int
gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, u32 **p)
+ struct rpc_rqst *rqstp, __be32 **p)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct xdr_buf integ_buf;
@@ -1176,7 +1170,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
static inline int
gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, u32 **p)
+ struct rpc_rqst *rqstp, __be32 **p)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
u32 offset;
@@ -1205,13 +1199,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
static int
gss_unwrap_resp(struct rpc_task *task,
- kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
+ kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- u32 *savedp = p;
+ __be32 *savedp = p;
struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
int savedlen = head->iov_len;
int status = -EIO;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 76b969e6904f..e11a40b25cce 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <linux/err.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/slab.h>
@@ -49,7 +50,7 @@
u32
krb5_encrypt(
- struct crypto_tfm *tfm,
+ struct crypto_blkcipher *tfm,
void * iv,
void * in,
void * out,
@@ -58,26 +59,27 @@ krb5_encrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[16] = {0};
+ struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
dprintk("RPC: krb5_encrypt: input data:\n");
print_hexl((u32 *)in, length, 0);
- if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+ if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_tfm_alg_ivsize(tfm) > 16) {
+ if (crypto_blkcipher_ivsize(tfm) > 16) {
dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n",
- crypto_tfm_alg_ivsize(tfm));
+ crypto_blkcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
+ memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
memcpy(out, in, length);
sg_set_buf(sg, out, length);
- ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
+ ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
dprintk("RPC: krb5_encrypt: output data:\n");
print_hexl((u32 *)out, length, 0);
@@ -90,7 +92,7 @@ EXPORT_SYMBOL(krb5_encrypt);
u32
krb5_decrypt(
- struct crypto_tfm *tfm,
+ struct crypto_blkcipher *tfm,
void * iv,
void * in,
void * out,
@@ -99,25 +101,26 @@ krb5_decrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[16] = {0};
+ struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
dprintk("RPC: krb5_decrypt: input data:\n");
print_hexl((u32 *)in, length, 0);
- if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+ if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_tfm_alg_ivsize(tfm) > 16) {
+ if (crypto_blkcipher_ivsize(tfm) > 16) {
dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n",
- crypto_tfm_alg_ivsize(tfm));
+ crypto_blkcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
+ memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
memcpy(out, in, length);
sg_set_buf(sg, out, length);
- ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
+ ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
dprintk("RPC: krb5_decrypt: output_data:\n");
print_hexl((u32 *)out, length, 0);
@@ -197,11 +200,9 @@ out:
static int
checksummer(struct scatterlist *sg, void *data)
{
- struct crypto_tfm *tfm = (struct crypto_tfm *)data;
+ struct hash_desc *desc = data;
- crypto_digest_update(tfm, sg, 1);
-
- return 0;
+ return crypto_hash_update(desc, sg, sg->length);
}
/* checksum the plaintext data and hdrlen bytes of the token header */
@@ -210,8 +211,9 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
int body_offset, struct xdr_netobj *cksum)
{
char *cksumname;
- struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */
+ struct hash_desc desc; /* XXX add to ctx? */
struct scatterlist sg[1];
+ int err;
switch (cksumtype) {
case CKSUMTYPE_RSA_MD5:
@@ -222,25 +224,35 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
" unsupported checksum %d", cksumtype);
return GSS_S_FAILURE;
}
- if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP)))
+ desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(desc.tfm))
return GSS_S_FAILURE;
- cksum->len = crypto_tfm_alg_digestsize(tfm);
+ cksum->len = crypto_hash_digestsize(desc.tfm);
+ desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- crypto_digest_init(tfm);
+ err = crypto_hash_init(&desc);
+ if (err)
+ goto out;
sg_set_buf(sg, header, hdrlen);
- crypto_digest_update(tfm, sg, 1);
- process_xdr_buf(body, body_offset, body->len - body_offset,
- checksummer, tfm);
- crypto_digest_final(tfm, cksum->data);
- crypto_free_tfm(tfm);
- return 0;
+ err = crypto_hash_update(&desc, sg, hdrlen);
+ if (err)
+ goto out;
+ err = process_xdr_buf(body, body_offset, body->len - body_offset,
+ checksummer, &desc);
+ if (err)
+ goto out;
+ err = crypto_hash_final(&desc, cksum->data);
+
+out:
+ crypto_free_hash(desc.tfm);
+ return err ? GSS_S_FAILURE : 0;
}
EXPORT_SYMBOL(make_checksum);
struct encryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
- struct crypto_tfm *tfm;
+ struct blkcipher_desc desc;
int pos;
struct xdr_buf *outbuf;
struct page **pages;
@@ -285,8 +297,8 @@ encryptor(struct scatterlist *sg, void *data)
if (thislen == 0)
return 0;
- ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
- thislen, desc->iv);
+ ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
+ desc->infrags, thislen);
if (ret)
return ret;
if (fraglen) {
@@ -305,16 +317,18 @@ encryptor(struct scatterlist *sg, void *data)
}
int
-gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
- struct page **pages)
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+ int offset, struct page **pages)
{
int ret;
struct encryptor_desc desc;
- BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+ BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
memset(desc.iv, 0, sizeof(desc.iv));
- desc.tfm = tfm;
+ desc.desc.tfm = tfm;
+ desc.desc.info = desc.iv;
+ desc.desc.flags = 0;
desc.pos = offset;
desc.outbuf = buf;
desc.pages = pages;
@@ -329,7 +343,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf);
struct decryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
- struct crypto_tfm *tfm;
+ struct blkcipher_desc desc;
struct scatterlist frags[4];
int fragno;
int fraglen;
@@ -355,8 +369,8 @@ decryptor(struct scatterlist *sg, void *data)
if (thislen == 0)
return 0;
- ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
- thislen, desc->iv);
+ ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
+ desc->frags, thislen);
if (ret)
return ret;
if (fraglen) {
@@ -373,15 +387,18 @@ decryptor(struct scatterlist *sg, void *data)
}
int
-gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+ int offset)
{
struct decryptor_desc desc;
/* XXXJBF: */
- BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+ BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
memset(desc.iv, 0, sizeof(desc.iv));
- desc.tfm = tfm;
+ desc.desc.tfm = tfm;
+ desc.desc.info = desc.iv;
+ desc.desc.flags = 0;
desc.fragno = 0;
desc.fraglen = 0;
return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 129e2bd36aff..325e72e4fd31 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,7 @@
*
*/
+#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
}
static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res)
{
struct xdr_netobj key;
- int alg, alg_mode;
+ int alg;
char *alg_name;
p = simple_get_bytes(p, end, &alg, sizeof(alg));
@@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
switch (alg) {
case ENCTYPE_DES_CBC_RAW:
- alg_name = "des";
- alg_mode = CRYPTO_TFM_MODE_CBC;
+ alg_name = "cbc(des)";
break;
default:
printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
goto out_err_free_key;
}
- if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+ *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(*res)) {
printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
+ *res = NULL;
goto out_err_free_key;
}
- if (crypto_cipher_setkey(*res, key.data, key.len)) {
+ if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
goto out_err_free_tfm;
}
@@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
return p;
out_err_free_tfm:
- crypto_free_tfm(*res);
+ crypto_free_blkcipher(*res);
out_err_free_key:
kfree(key.data);
p = ERR_PTR(-EINVAL);
@@ -129,9 +131,8 @@ gss_import_sec_context_kerberos(const void *p,
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
- if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+ if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
goto out_err;
- memset(ctx, 0, sizeof(*ctx));
p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
if (IS_ERR(p))
@@ -169,13 +170,13 @@ gss_import_sec_context_kerberos(const void *p,
}
ctx_id->internal_ctx_id = ctx;
- dprintk("RPC: Succesfully imported new context.\n");
+ dprintk("RPC: Successfully imported new context.\n");
return 0;
out_err_free_key2:
- crypto_free_tfm(ctx->seq);
+ crypto_free_blkcipher(ctx->seq);
out_err_free_key1:
- crypto_free_tfm(ctx->enc);
+ crypto_free_blkcipher(ctx->enc);
out_err_free_mech:
kfree(ctx->mech_used.data);
out_err_free_ctx:
@@ -188,8 +189,8 @@ static void
gss_delete_sec_context_kerberos(void *internal_ctx) {
struct krb5_ctx *kctx = internal_ctx;
- crypto_free_tfm(kctx->seq);
- crypto_free_tfm(kctx->enc);
+ crypto_free_blkcipher(kctx->seq);
+ crypto_free_blkcipher(kctx->enc);
kfree(kctx->mech_used.data);
kfree(kctx);
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index f43311221a72..08601ee4cd73 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,7 +70,7 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(krb5_seq_lock);
u32
gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
@@ -115,7 +115,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
krb5_hdr = ptr - 2;
msg_start = krb5_hdr + 24;
- *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+ *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg);
memset(krb5_hdr + 4, 0xff, 4);
if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c53ead39118d..c604baf3a5f6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -41,7 +41,7 @@
#endif
s32
-krb5_make_seq_num(struct crypto_tfm *key,
+krb5_make_seq_num(struct crypto_blkcipher *key,
int direction,
s32 seqnum,
unsigned char *cksum, unsigned char *buf)
@@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key,
}
s32
-krb5_get_seq_num(struct crypto_tfm *key,
+krb5_get_seq_num(struct crypto_blkcipher *key,
unsigned char *cksum,
unsigned char *buf,
int *direction, s32 * seqnum)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 89d1f3e14128..cc45c1605f80 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
goto out_err;
}
- blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+ blocksize = crypto_blkcipher_blocksize(kctx->enc);
gss_krb5_add_padding(buf, offset, blocksize);
BUG_ON((buf->len - offset) % blocksize);
plainlen = blocksize + buf->len - offset;
@@ -177,9 +177,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
msg_start = krb5_hdr + 24;
/* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
- *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
+ *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg);
memset(krb5_hdr + 4, 0xff, 4);
- *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
+ *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
make_confounder(msg_start, blocksize);
@@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
/* Copy the data back to the right position. XXX: Would probably be
* better to copy and encrypt at the same time. */
- blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+ blocksize = crypto_blkcipher_blocksize(kctx->enc);
data_start = ptr + 22 + blocksize;
orig_start = buf->head[0].iov_base + offset;
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index f8bac6ccd524..3db745379d06 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -224,7 +224,8 @@ EXPORT_SYMBOL(gss_service_to_auth_domain_name);
void
gss_mech_put(struct gss_api_mech * gm)
{
- module_put(gm->gm_owner);
+ if (gm)
+ module_put(gm->gm_owner);
}
EXPORT_SYMBOL(gss_mech_put);
@@ -236,9 +237,8 @@ gss_import_sec_context(const void *input_token, size_t bufsize,
struct gss_api_mech *mech,
struct gss_ctx **ctx_id)
{
- if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL)))
+ if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
return GSS_S_FAILURE;
- memset(*ctx_id, 0, sizeof(**ctx_id));
(*ctx_id)->mech_type = gss_mech_get(mech);
return mech->gm_ops
@@ -307,8 +307,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
(*context_handle)->mech_type->gm_ops
->gss_delete_sec_context((*context_handle)
->internal_ctx_id);
- if ((*context_handle)->mech_type)
- gss_mech_put((*context_handle)->mech_type);
+ gss_mech_put((*context_handle)->mech_type);
kfree(*context_handle);
*context_handle=NULL;
return GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 5bf11ccba7cd..bdedf456bc17 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -34,6 +34,7 @@
*
*/
+#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
}
static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res,
+ int *resalg)
{
struct xdr_netobj key = { 0 };
- int alg_mode,setkey = 0;
+ int setkey = 0;
char *alg_name;
p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
@@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
switch (*resalg) {
case NID_des_cbc:
- alg_name = "des";
- alg_mode = CRYPTO_TFM_MODE_CBC;
+ alg_name = "cbc(des)";
setkey = 1;
break;
case NID_cast5_cbc:
/* XXXX here in name only, not used */
- alg_name = "cast5";
- alg_mode = CRYPTO_TFM_MODE_CBC;
+ alg_name = "cbc(cast5)";
setkey = 0; /* XXX will need to set to 1 */
break;
case NID_md5:
@@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
}
alg_name = "md5";
- alg_mode = 0;
setkey = 0;
break;
default:
dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
goto out_err_free_key;
}
- if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+ *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(*res)) {
printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
+ *res = NULL;
goto out_err_free_key;
}
if (setkey) {
- if (crypto_cipher_setkey(*res, key.data, key.len)) {
+ if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
goto out_err_free_tfm;
}
@@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
return p;
out_err_free_tfm:
- crypto_free_tfm(*res);
+ crypto_free_blkcipher(*res);
out_err_free_key:
if(key.len > 0)
kfree(key.data);
@@ -152,9 +153,8 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
const void *end = (const void *)((const char *)p + len);
struct spkm3_ctx *ctx;
- if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+ if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
goto out_err;
- memset(ctx, 0, sizeof(*ctx));
p = simple_get_netobj(p, end, &ctx->ctx_id);
if (IS_ERR(p))
@@ -201,13 +201,13 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
ctx_id->internal_ctx_id = ctx;
- dprintk("Succesfully imported new spkm context.\n");
+ dprintk("Successfully imported new spkm context.\n");
return 0;
out_err_free_key2:
- crypto_free_tfm(ctx->derived_integ_key);
+ crypto_free_blkcipher(ctx->derived_integ_key);
out_err_free_key1:
- crypto_free_tfm(ctx->derived_conf_key);
+ crypto_free_blkcipher(ctx->derived_conf_key);
out_err_free_s_key:
kfree(ctx->share_key.data);
out_err_free_mech:
@@ -224,8 +224,8 @@ static void
gss_delete_sec_context_spkm3(void *internal_ctx) {
struct spkm3_ctx *sctx = internal_ctx;
- crypto_free_tfm(sctx->derived_integ_key);
- crypto_free_tfm(sctx->derived_conf_key);
+ crypto_free_blkcipher(sctx->derived_integ_key);
+ crypto_free_blkcipher(sctx->derived_conf_key);
kfree(sctx->share_key.data);
kfree(sctx->mech_used.data);
kfree(sctx);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index af0d7ce74686..854a983ccf26 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -90,10 +90,9 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
int
decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
{
- if (!(out->data = kmalloc(explen,GFP_KERNEL)))
+ if (!(out->data = kzalloc(explen,GFP_KERNEL)))
return 0;
out->len = explen;
- memset(out->data, 0, explen);
memcpy(out->data, in, enclen);
return 1;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d51e316c5821..638c0b576203 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -425,6 +425,7 @@ static int rsc_parse(struct cache_detail *cd,
struct rsc rsci, *rscp = NULL;
time_t expiry;
int status = -EINVAL;
+ struct gss_api_mech *gm = NULL;
memset(&rsci, 0, sizeof(rsci));
/* context handle */
@@ -453,7 +454,6 @@ static int rsc_parse(struct cache_detail *cd,
set_bit(CACHE_NEGATIVE, &rsci.h.flags);
else {
int N, i;
- struct gss_api_mech *gm;
/* gid */
if (get_int(&mesg, &rsci.cred.cr_gid))
@@ -488,21 +488,17 @@ static int rsc_parse(struct cache_detail *cd,
status = -EINVAL;
/* mech-specific data: */
len = qword_get(&mesg, buf, mlen);
- if (len < 0) {
- gss_mech_put(gm);
+ if (len < 0)
goto out;
- }
status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
- if (status) {
- gss_mech_put(gm);
+ if (status)
goto out;
- }
- gss_mech_put(gm);
}
rsci.h.expiry_time = expiry;
rscp = rsc_update(&rsci, rscp);
status = 0;
out:
+ gss_mech_put(gm);
rsc_free(&rsci);
if (rscp)
cache_put(&rscp->h, &rsc_cache);
@@ -611,7 +607,7 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
if (argv->iov_len < 4)
return -1;
- o->len = ntohl(svc_getu32(argv));
+ o->len = svc_getnl(argv);
l = round_up_to_quad(o->len);
if (argv->iov_len < l)
return -1;
@@ -624,17 +620,17 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
static inline int
svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
{
- u32 *p;
+ u8 *p;
if (resv->iov_len + 4 > PAGE_SIZE)
return -1;
- svc_putu32(resv, htonl(o->len));
+ svc_putnl(resv, o->len);
p = resv->iov_base + resv->iov_len;
resv->iov_len += round_up_to_quad(o->len);
if (resv->iov_len > PAGE_SIZE)
return -1;
memcpy(p, o->data, o->len);
- memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len);
+ memset(p + o->len, 0, round_up_to_quad(o->len) - o->len);
return 0;
}
@@ -644,7 +640,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
*/
static int
gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
- u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp)
+ __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
{
struct gss_ctx *ctx_id = rsci->mechctx;
struct xdr_buf rpchdr;
@@ -661,7 +657,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
*authp = rpc_autherr_badverf;
if (argv->iov_len < 4)
return SVC_DENIED;
- flavor = ntohl(svc_getu32(argv));
+ flavor = svc_getnl(argv);
if (flavor != RPC_AUTH_GSS)
return SVC_DENIED;
if (svc_safe_getnetobj(argv, &checksum))
@@ -691,9 +687,9 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
static int
gss_write_null_verf(struct svc_rqst *rqstp)
{
- u32 *p;
+ __be32 *p;
- svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL));
+ svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL);
p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
/* don't really need to check if head->iov_len > PAGE_SIZE ... */
*p++ = 0;
@@ -705,14 +701,14 @@ gss_write_null_verf(struct svc_rqst *rqstp)
static int
gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
{
- u32 xdr_seq;
+ __be32 xdr_seq;
u32 maj_stat;
struct xdr_buf verf_data;
struct xdr_netobj mic;
- u32 *p;
+ __be32 *p;
struct kvec iov;
- svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS));
+ svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
xdr_seq = htonl(seq);
iov.iov_base = &xdr_seq;
@@ -786,7 +782,7 @@ EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
static inline int
read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
{
- u32 raw;
+ __be32 raw;
int status;
status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
@@ -809,7 +805,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
struct xdr_netobj mic;
struct xdr_buf integ_buf;
- integ_len = ntohl(svc_getu32(&buf->head[0]));
+ integ_len = svc_getnl(&buf->head[0]);
if (integ_len & 3)
goto out;
if (integ_len > buf->len)
@@ -829,19 +825,87 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
if (maj_stat != GSS_S_COMPLETE)
goto out;
- if (ntohl(svc_getu32(&buf->head[0])) != seq)
+ if (svc_getnl(&buf->head[0]) != seq)
goto out;
stat = 0;
out:
return stat;
}
+static inline int
+total_buf_len(struct xdr_buf *buf)
+{
+ return buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len;
+}
+
+static void
+fix_priv_head(struct xdr_buf *buf, int pad)
+{
+ if (buf->page_len == 0) {
+ /* We need to adjust head and buf->len in tandem in this
+ * case to make svc_defer() work--it finds the original
+ * buffer start using buf->len - buf->head[0].iov_len. */
+ buf->head[0].iov_len -= pad;
+ }
+}
+
+static int
+unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+{
+ u32 priv_len, maj_stat;
+ int pad, saved_len, remaining_len, offset;
+
+ rqstp->rq_sendfile_ok = 0;
+
+ priv_len = svc_getnl(&buf->head[0]);
+ if (rqstp->rq_deferred) {
+ /* Already decrypted last time through! The sequence number
+ * check at out_seq is unnecessary but harmless: */
+ goto out_seq;
+ }
+ /* buf->len is the number of bytes from the original start of the
+ * request to the end, where head[0].iov_len is just the bytes
+ * not yet read from the head, so these two values are different: */
+ remaining_len = total_buf_len(buf);
+ if (priv_len > remaining_len)
+ return -EINVAL;
+ pad = remaining_len - priv_len;
+ buf->len -= pad;
+ fix_priv_head(buf, pad);
+
+ /* Maybe it would be better to give gss_unwrap a length parameter: */
+ saved_len = buf->len;
+ buf->len = priv_len;
+ maj_stat = gss_unwrap(ctx, 0, buf);
+ pad = priv_len - buf->len;
+ buf->len = saved_len;
+ buf->len -= pad;
+ /* The upper layers assume the buffer is aligned on 4-byte boundaries.
+ * In the krb5p case, at least, the data ends up offset, so we need to
+ * move it around. */
+ /* XXX: This is very inefficient. It would be better to either do
+ * this while we encrypt, or maybe in the receive code, if we can peak
+ * ahead and work out the service and mechanism there. */
+ offset = buf->head[0].iov_len % 4;
+ if (offset) {
+ buf->buflen = RPCSVC_MAXPAYLOAD;
+ xdr_shift_buf(buf, offset);
+ fix_priv_head(buf, pad);
+ }
+ if (maj_stat != GSS_S_COMPLETE)
+ return -EINVAL;
+out_seq:
+ if (svc_getnl(&buf->head[0]) != seq)
+ return -EINVAL;
+ return 0;
+}
+
struct gss_svc_data {
/* decoded gss client cred: */
struct rpc_gss_wire_cred clcred;
/* pointer to the beginning of the procedure-specific results,
* which may be encrypted/checksummed in svcauth_gss_release: */
- u32 *body_start;
+ __be32 *body_start;
struct rsc *rsci;
};
@@ -882,7 +946,7 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
* response here and return SVC_COMPLETE.
*/
static int
-svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -892,8 +956,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
struct rpc_gss_wire_cred *gc;
struct rsc *rsci = NULL;
struct rsi *rsip, rsikey;
- u32 *rpcstart;
- u32 *reject_stat = resv->iov_base + resv->iov_len;
+ __be32 *rpcstart;
+ __be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len);
@@ -921,12 +985,12 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
if (argv->iov_len < 5 * 4)
goto auth_err;
- crlen = ntohl(svc_getu32(argv));
- if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION)
+ crlen = svc_getnl(argv);
+ if (svc_getnl(argv) != RPC_GSS_VERSION)
goto auth_err;
- gc->gc_proc = ntohl(svc_getu32(argv));
- gc->gc_seq = ntohl(svc_getu32(argv));
- gc->gc_svc = ntohl(svc_getu32(argv));
+ gc->gc_proc = svc_getnl(argv);
+ gc->gc_seq = svc_getnl(argv);
+ gc->gc_svc = svc_getnl(argv);
if (svc_safe_getnetobj(argv, &gc->gc_ctx))
goto auth_err;
if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
@@ -952,9 +1016,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
case RPC_GSS_PROC_CONTINUE_INIT:
if (argv->iov_len < 2 * 4)
goto auth_err;
- if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL)
+ if (svc_getnl(argv) != RPC_AUTH_NULL)
goto auth_err;
- if (ntohl(svc_getu32(argv)) != 0)
+ if (svc_getnl(argv) != 0)
goto auth_err;
break;
case RPC_GSS_PROC_DATA:
@@ -1012,14 +1076,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
goto drop;
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
- svc_putu32(resv, rpc_success);
+ svc_putnl(resv, RPC_SUCCESS);
if (svc_safe_putnetobj(resv, &rsip->out_handle))
goto drop;
if (resv->iov_len + 3 * 4 > PAGE_SIZE)
goto drop;
- svc_putu32(resv, htonl(rsip->major_status));
- svc_putu32(resv, htonl(rsip->minor_status));
- svc_putu32(resv, htonl(GSS_SEQ_WIN));
+ svc_putnl(resv, rsip->major_status);
+ svc_putnl(resv, rsip->minor_status);
+ svc_putnl(resv, GSS_SEQ_WIN);
if (svc_safe_putnetobj(resv, &rsip->out_token))
goto drop;
rqstp->rq_client = NULL;
@@ -1029,7 +1093,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
set_bit(CACHE_NEGATIVE, &rsci->h.flags);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
- svc_putu32(resv, rpc_success);
+ svc_putnl(resv, RPC_SUCCESS);
goto complete;
case RPC_GSS_PROC_DATA:
*authp = rpcsec_gsserr_ctxproblem;
@@ -1047,11 +1111,18 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
goto auth_err;
/* placeholders for length and seq. number: */
svcdata->body_start = resv->iov_base + resv->iov_len;
- svc_putu32(resv, 0);
- svc_putu32(resv, 0);
+ svc_putnl(resv, 0);
+ svc_putnl(resv, 0);
break;
case RPC_GSS_SVC_PRIVACY:
- /* currently unsupported */
+ if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
+ gc->gc_seq, rsci->mechctx))
+ goto auth_err;
+ /* placeholders for length and seq. number: */
+ svcdata->body_start = resv->iov_base + resv->iov_len;
+ svc_putnl(resv, 0);
+ svc_putnl(resv, 0);
+ break;
default:
goto auth_err;
}
@@ -1076,8 +1147,8 @@ out:
return ret;
}
-static int
-svcauth_gss_release(struct svc_rqst *rqstp)
+static inline int
+svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
{
struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
struct rpc_gss_wire_cred *gc = &gsd->clcred;
@@ -1085,73 +1156,151 @@ svcauth_gss_release(struct svc_rqst *rqstp)
struct xdr_buf integ_buf;
struct xdr_netobj mic;
struct kvec *resv;
- u32 *p;
+ __be32 *p;
int integ_offset, integ_len;
int stat = -EINVAL;
+ p = gsd->body_start;
+ gsd->body_start = NULL;
+ /* move accept_stat to right place: */
+ memcpy(p, p + 2, 4);
+ /* Don't wrap in failure case: */
+ /* Counting on not getting here if call was not even accepted! */
+ if (*p != rpc_success) {
+ resbuf->head[0].iov_len -= 2 * 4;
+ goto out;
+ }
+ p++;
+ integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
+ integ_len = resbuf->len - integ_offset;
+ BUG_ON(integ_len % 4);
+ *p++ = htonl(integ_len);
+ *p++ = htonl(gc->gc_seq);
+ if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
+ integ_len))
+ BUG();
+ if (resbuf->page_len == 0
+ && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE
+ < PAGE_SIZE) {
+ BUG_ON(resbuf->tail[0].iov_len);
+ /* Use head for everything */
+ resv = &resbuf->head[0];
+ } else if (resbuf->tail[0].iov_base == NULL) {
+ if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ goto out_err;
+ resbuf->tail[0].iov_base = resbuf->head[0].iov_base
+ + resbuf->head[0].iov_len;
+ resbuf->tail[0].iov_len = 0;
+ rqstp->rq_restailpage = 0;
+ resv = &resbuf->tail[0];
+ } else {
+ resv = &resbuf->tail[0];
+ }
+ mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
+ if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
+ goto out_err;
+ svc_putnl(resv, mic.len);
+ memset(mic.data + mic.len, 0,
+ round_up_to_quad(mic.len) - mic.len);
+ resv->iov_len += XDR_QUADLEN(mic.len) << 2;
+ /* not strictly required: */
+ resbuf->len += XDR_QUADLEN(mic.len) << 2;
+ BUG_ON(resv->iov_len > PAGE_SIZE);
+out:
+ stat = 0;
+out_err:
+ return stat;
+}
+
+static inline int
+svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
+{
+ struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct rpc_gss_wire_cred *gc = &gsd->clcred;
+ struct xdr_buf *resbuf = &rqstp->rq_res;
+ struct page **inpages = NULL;
+ __be32 *p, *len;
+ int offset;
+ int pad;
+
+ p = gsd->body_start;
+ gsd->body_start = NULL;
+ /* move accept_stat to right place: */
+ memcpy(p, p + 2, 4);
+ /* Don't wrap in failure case: */
+ /* Counting on not getting here if call was not even accepted! */
+ if (*p != rpc_success) {
+ resbuf->head[0].iov_len -= 2 * 4;
+ return 0;
+ }
+ p++;
+ len = p++;
+ offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
+ *p++ = htonl(gc->gc_seq);
+ inpages = resbuf->pages;
+ /* XXX: Would be better to write some xdr helper functions for
+ * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
+ if (resbuf->tail[0].iov_base && rqstp->rq_restailpage == 0) {
+ BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
+ + PAGE_SIZE);
+ BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base);
+ if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len
+ + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ return -ENOMEM;
+ memmove(resbuf->tail[0].iov_base + RPC_MAX_AUTH_SIZE,
+ resbuf->tail[0].iov_base,
+ resbuf->tail[0].iov_len);
+ resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ }
+ if (resbuf->tail[0].iov_base == NULL) {
+ if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ return -ENOMEM;
+ resbuf->tail[0].iov_base = resbuf->head[0].iov_base
+ + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE;
+ resbuf->tail[0].iov_len = 0;
+ rqstp->rq_restailpage = 0;
+ }
+ if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages))
+ return -ENOMEM;
+ *len = htonl(resbuf->len - offset);
+ pad = 3 - ((resbuf->len - offset - 1)&3);
+ p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
+ memset(p, 0, pad);
+ resbuf->tail[0].iov_len += pad;
+ resbuf->len += pad;
+ return 0;
+}
+
+static int
+svcauth_gss_release(struct svc_rqst *rqstp)
+{
+ struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct rpc_gss_wire_cred *gc = &gsd->clcred;
+ struct xdr_buf *resbuf = &rqstp->rq_res;
+ int stat = -EINVAL;
+
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
/* Release can be called twice, but we only wrap once. */
if (gsd->body_start == NULL)
goto out;
/* normally not set till svc_send, but we need it here: */
- resbuf->len = resbuf->head[0].iov_len
- + resbuf->page_len + resbuf->tail[0].iov_len;
+ /* XXX: what for? Do we mess it up the moment we call svc_putu32
+ * or whatever? */
+ resbuf->len = total_buf_len(resbuf);
switch (gc->gc_svc) {
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
- p = gsd->body_start;
- gsd->body_start = NULL;
- /* move accept_stat to right place: */
- memcpy(p, p + 2, 4);
- /* don't wrap in failure case: */
- /* Note: counting on not getting here if call was not even
- * accepted! */
- if (*p != rpc_success) {
- resbuf->head[0].iov_len -= 2 * 4;
- goto out;
- }
- p++;
- integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
- integ_len = resbuf->len - integ_offset;
- BUG_ON(integ_len % 4);
- *p++ = htonl(integ_len);
- *p++ = htonl(gc->gc_seq);
- if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
- integ_len))
- BUG();
- if (resbuf->page_len == 0
- && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE
- < PAGE_SIZE) {
- BUG_ON(resbuf->tail[0].iov_len);
- /* Use head for everything */
- resv = &resbuf->head[0];
- } else if (resbuf->tail[0].iov_base == NULL) {
- if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE
- > PAGE_SIZE)
- goto out_err;
- resbuf->tail[0].iov_base =
- resbuf->head[0].iov_base
- + resbuf->head[0].iov_len;
- resbuf->tail[0].iov_len = 0;
- rqstp->rq_restailpage = 0;
- resv = &resbuf->tail[0];
- } else {
- resv = &resbuf->tail[0];
- }
- mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
- if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
+ stat = svcauth_gss_wrap_resp_integ(rqstp);
+ if (stat)
goto out_err;
- svc_putu32(resv, htonl(mic.len));
- memset(mic.data + mic.len, 0,
- round_up_to_quad(mic.len) - mic.len);
- resv->iov_len += XDR_QUADLEN(mic.len) << 2;
- /* not strictly required: */
- resbuf->len += XDR_QUADLEN(mic.len) << 2;
- BUG_ON(resv->iov_len > PAGE_SIZE);
break;
case RPC_GSS_SVC_PRIVACY:
+ stat = svcauth_gss_wrap_resp_priv(rqstp);
+ if (stat)
+ goto out_err;
+ break;
default:
goto out_err;
}
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index f56767aaa927..3be257dc32b2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -60,8 +60,8 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
/*
* Marshal credential.
*/
-static u32 *
-nul_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+nul_marshal(struct rpc_task *task, __be32 *p)
{
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
@@ -81,8 +81,8 @@ nul_refresh(struct rpc_task *task)
return 0;
}
-static u32 *
-nul_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+nul_validate(struct rpc_task *task, __be32 *p)
{
rpc_authflavor_t flavor;
u32 size;
@@ -118,6 +118,8 @@ struct rpc_auth null_auth = {
.au_cslack = 4,
.au_rslack = 2,
.au_ops = &authnull_ops,
+ .au_flavor = RPC_AUTH_NULL,
+ .au_count = ATOMIC_INIT(0),
};
static
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index df14b6bfbf10..f7f990c9afe2 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -137,12 +137,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
-static u32 *
-unx_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+unx_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_clnt *clnt = task->tk_client;
struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
- u32 *base, *hold;
+ __be32 *base, *hold;
int i;
*p++ = htonl(RPC_AUTH_UNIX);
@@ -178,8 +178,8 @@ unx_refresh(struct rpc_task *task)
return 0;
}
-static u32 *
-unx_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+unx_validate(struct rpc_task *task, __be32 *p)
{
rpc_authflavor_t flavor;
u32 size;
@@ -225,6 +225,7 @@ struct rpc_auth unix_auth = {
.au_cslack = UNX_WRITESLACK,
.au_rslack = 2, /* assume AUTH_NULL verf */
.au_ops = &authunix_ops,
+ .au_flavor = RPC_AUTH_UNIX,
.au_count = ATOMIC_INIT(0),
.au_credcache = &unix_cred_cache,
};
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7026b0866b7b..00cb388ece03 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -71,7 +71,12 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
new = detail->alloc();
if (!new)
return NULL;
+ /* must fully initialise 'new', else
+ * we might get lose if we need to
+ * cache_put it soon.
+ */
cache_init(new);
+ detail->init(new, key);
write_lock(&detail->hash_lock);
@@ -85,7 +90,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
return tmp;
}
}
- detail->init(new, key);
new->next = *head;
*head = new;
detail->entries++;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index aa8965e9d307..124ff0ceb55b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -60,8 +60,8 @@ static void call_refreshresult(struct rpc_task *task);
static void call_timeout(struct rpc_task *task);
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *task);
-static u32 * call_header(struct rpc_task *task);
-static u32 * call_verify(struct rpc_task *task);
+static __be32 * call_header(struct rpc_task *task);
+static __be32 * call_verify(struct rpc_task *task);
static int
@@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
}
}
-/*
- * Create an RPC client
- * FIXME: This should also take a flags argument (as in task->tk_flags).
- * It's called (among others) from pmap_create_client, which may in
- * turn be called by an async task. In this case, rpciod should not be
- * made to sleep too long.
- */
-struct rpc_clnt *
-rpc_new_client(struct rpc_xprt *xprt, char *servname,
- struct rpc_program *program, u32 vers,
- rpc_authflavor_t flavor)
+static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
{
struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
@@ -125,10 +115,9 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
goto out_err;
err = -ENOMEM;
- clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
+ clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
- memset(clnt, 0, sizeof(*clnt));
atomic_set(&clnt->cl_users, 0);
atomic_set(&clnt->cl_count, 1);
clnt->cl_parent = clnt;
@@ -148,16 +137,12 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_protname = program->name;
- clnt->cl_pmap = &clnt->cl_pmap_default;
- clnt->cl_port = xprt->addr.sin_port;
clnt->cl_prog = program->number;
clnt->cl_vers = version->number;
- clnt->cl_prot = xprt->prot;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
- rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
- if (!clnt->cl_port)
+ if (!xprt_bound(clnt->cl_xprt))
clnt->cl_autobind = 1;
clnt->cl_rtt = &clnt->cl_rtt_default;
@@ -184,8 +169,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
out_no_auth:
if (!IS_ERR(clnt->cl_dentry)) {
- rpc_rmdir(clnt->cl_pathname);
- dput(clnt->cl_dentry);
+ rpc_rmdir(clnt->cl_dentry);
rpc_put_mount();
}
out_no_path:
@@ -193,40 +177,71 @@ out_no_path:
kfree(clnt->cl_server);
kfree(clnt);
out_err:
- xprt_destroy(xprt);
+ xprt_put(xprt);
out_no_xprt:
return ERR_PTR(err);
}
-/**
- * Create an RPC client
- * @xprt - pointer to xprt struct
- * @servname - name of server
- * @info - rpc_program
- * @version - rpc_program version
- * @authflavor - rpc_auth flavour to use
+/*
+ * rpc_create - create an RPC client and transport with one call
+ * @args: rpc_clnt create argument structure
*
- * Creates an RPC client structure, then pings the server in order to
- * determine if it is up, and if it supports this program and version.
+ * Creates and initializes an RPC transport and an RPC client.
*
- * This function should never be called by asynchronous tasks such as
- * the portmapper.
+ * It can ping the server in order to determine if it is up, and to see if
+ * it supports this program and version. RPC_CLNT_CREATE_NOPING disables
+ * this behavior so asynchronous tasks can also use rpc_create.
*/
-struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
- struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
+struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
+ struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
- int err;
-
- clnt = rpc_new_client(xprt, servname, info, version, authflavor);
+
+ xprt = xprt_create_transport(args->protocol, args->address,
+ args->addrsize, args->timeout);
+ if (IS_ERR(xprt))
+ return (struct rpc_clnt *)xprt;
+
+ /*
+ * By default, kernel RPC client connects from a reserved port.
+ * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
+ * but it is always enabled for rpciod, which handles the connect
+ * operation.
+ */
+ xprt->resvport = 1;
+ if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
+ xprt->resvport = 0;
+
+ dprintk("RPC: creating %s client for %s (xprt %p)\n",
+ args->program->name, args->servername, xprt);
+
+ clnt = rpc_new_client(xprt, args->servername, args->program,
+ args->version, args->authflavor);
if (IS_ERR(clnt))
return clnt;
- err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
- if (err == 0)
- return clnt;
- rpc_shutdown_client(clnt);
- return ERR_PTR(err);
+
+ if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
+ int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
+ }
+
+ clnt->cl_softrtry = 1;
+ if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+ clnt->cl_softrtry = 0;
+
+ if (args->flags & RPC_CLNT_CREATE_INTR)
+ clnt->cl_intr = 1;
+ if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+ clnt->cl_autobind = 1;
+ if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+ clnt->cl_oneshot = 1;
+
+ return clnt;
}
+EXPORT_SYMBOL_GPL(rpc_create);
/*
* This function clones the RPC client structure. It allows us to share the
@@ -246,21 +261,17 @@ rpc_clone_client(struct rpc_clnt *clnt)
atomic_set(&new->cl_users, 0);
new->cl_parent = clnt;
atomic_inc(&clnt->cl_count);
- /* Duplicate portmapper */
- rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+ new->cl_xprt = xprt_get(clnt->cl_xprt);
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_oneshot = 0;
new->cl_dead = 0;
- if (!IS_ERR(new->cl_dentry)) {
+ if (!IS_ERR(new->cl_dentry))
dget(new->cl_dentry);
- rpc_get_mount();
- }
rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
- new->cl_pmap = &new->cl_pmap_default;
- new->cl_metrics = rpc_alloc_iostats(clnt);
+ new->cl_metrics = rpc_alloc_iostats(clnt);
return new;
out_no_clnt:
printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -318,24 +329,21 @@ rpc_destroy_client(struct rpc_clnt *clnt)
clnt->cl_auth = NULL;
}
if (clnt->cl_parent != clnt) {
+ if (!IS_ERR(clnt->cl_dentry))
+ dput(clnt->cl_dentry);
rpc_destroy_client(clnt->cl_parent);
goto out_free;
}
- if (clnt->cl_pathname[0])
- rpc_rmdir(clnt->cl_pathname);
- if (clnt->cl_xprt) {
- xprt_destroy(clnt->cl_xprt);
- clnt->cl_xprt = NULL;
+ if (!IS_ERR(clnt->cl_dentry)) {
+ rpc_rmdir(clnt->cl_dentry);
+ rpc_put_mount();
}
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
- if (!IS_ERR(clnt->cl_dentry)) {
- dput(clnt->cl_dentry);
- rpc_put_mount();
- }
+ xprt_put(clnt->cl_xprt);
kfree(clnt);
return 0;
}
@@ -544,6 +552,40 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
task->tk_action = rpc_exit_task;
}
+/**
+ * rpc_peeraddr - extract remote peer address from clnt's xprt
+ * @clnt: RPC client structure
+ * @buf: target buffer
+ * @size: length of target buffer
+ *
+ * Returns the number of bytes that are actually in the stored address.
+ */
+size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
+{
+ size_t bytes;
+ struct rpc_xprt *xprt = clnt->cl_xprt;
+
+ bytes = sizeof(xprt->addr);
+ if (bytes > bufsize)
+ bytes = bufsize;
+ memcpy(buf, &clnt->cl_xprt->addr, bytes);
+ return xprt->addrlen;
+}
+EXPORT_SYMBOL_GPL(rpc_peeraddr);
+
+/**
+ * rpc_peeraddr2str - return remote peer address in printable format
+ * @clnt: RPC client structure
+ * @format: address format
+ *
+ */
+char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+{
+ struct rpc_xprt *xprt = clnt->cl_xprt;
+ return xprt->ops->print_addr(xprt, format);
+}
+EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
+
void
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
{
@@ -564,7 +606,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
{
return clnt->cl_xprt->max_payload;
}
-EXPORT_SYMBOL(rpc_max_payload);
+EXPORT_SYMBOL_GPL(rpc_max_payload);
/**
* rpc_force_rebind - force transport to check that remote port is unchanged
@@ -574,9 +616,9 @@ EXPORT_SYMBOL(rpc_max_payload);
void rpc_force_rebind(struct rpc_clnt *clnt)
{
if (clnt->cl_autobind)
- clnt->cl_port = 0;
+ xprt_clear_bound(clnt->cl_xprt);
}
-EXPORT_SYMBOL(rpc_force_rebind);
+EXPORT_SYMBOL_GPL(rpc_force_rebind);
/*
* Restart an (async) RPC call. Usually called from within the
@@ -740,7 +782,7 @@ call_encode(struct rpc_task *task)
struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
unsigned int bufsiz;
kxdrproc_t encode;
- u32 *p;
+ __be32 *p;
dprintk("RPC: %4d call_encode (status %d)\n",
task->tk_pid, task->tk_status);
@@ -785,16 +827,16 @@ call_encode(struct rpc_task *task)
static void
call_bind(struct rpc_task *task)
{
- struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_xprt *xprt = task->tk_xprt;
dprintk("RPC: %4d call_bind (status %d)\n",
task->tk_pid, task->tk_status);
task->tk_action = call_connect;
- if (!clnt->cl_port) {
+ if (!xprt_bound(xprt)) {
task->tk_action = call_bind_status;
- task->tk_timeout = task->tk_xprt->bind_timeout;
- rpc_getport(task, clnt);
+ task->tk_timeout = xprt->bind_timeout;
+ xprt->ops->rpcbind(task);
}
}
@@ -819,15 +861,11 @@ call_bind_status(struct rpc_task *task)
dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
task->tk_pid);
rpc_delay(task, 3*HZ);
- goto retry_bind;
+ goto retry_timeout;
case -ETIMEDOUT:
dprintk("RPC: %4d rpcbind request timed out\n",
task->tk_pid);
- if (RPC_IS_SOFT(task)) {
- status = -EIO;
- break;
- }
- goto retry_bind;
+ goto retry_timeout;
case -EPFNOSUPPORT:
dprintk("RPC: %4d remote rpcbind service unavailable\n",
task->tk_pid);
@@ -840,16 +878,13 @@ call_bind_status(struct rpc_task *task)
dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
status = -EIO;
- break;
}
rpc_exit(task, status);
return;
-retry_bind:
- task->tk_status = 0;
- task->tk_action = call_bind;
- return;
+retry_timeout:
+ task->tk_action = call_timeout;
}
/*
@@ -897,14 +932,16 @@ call_connect_status(struct rpc_task *task)
switch (status) {
case -ENOTCONN:
- case -ETIMEDOUT:
case -EAGAIN:
task->tk_action = call_bind;
- break;
- default:
- rpc_exit(task, -EIO);
- break;
+ if (!RPC_IS_SOFT(task))
+ return;
+ /* if soft mounted, test if we've timed out */
+ case -ETIMEDOUT:
+ task->tk_action = call_timeout;
+ return;
}
+ rpc_exit(task, -EIO);
}
/*
@@ -922,26 +959,43 @@ call_transmit(struct rpc_task *task)
task->tk_status = xprt_prepare_transmit(task);
if (task->tk_status != 0)
return;
+ task->tk_action = call_transmit_status;
/* Encode here so that rpcsec_gss can use correct sequence number. */
if (rpc_task_need_encode(task)) {
- task->tk_rqstp->rq_bytes_sent = 0;
+ BUG_ON(task->tk_rqstp->rq_bytes_sent != 0);
call_encode(task);
/* Did the encode result in an error condition? */
if (task->tk_status != 0)
- goto out_nosend;
+ return;
}
- task->tk_action = call_transmit_status;
xprt_transmit(task);
if (task->tk_status < 0)
return;
- if (!task->tk_msg.rpc_proc->p_decode) {
- task->tk_action = rpc_exit_task;
- rpc_wake_up_task(task);
- }
- return;
-out_nosend:
- /* release socket write lock before attempting to handle error */
- xprt_abort_transmit(task);
+ /*
+ * On success, ensure that we call xprt_end_transmit() before sleeping
+ * in order to allow access to the socket to other RPC requests.
+ */
+ call_transmit_status(task);
+ if (task->tk_msg.rpc_proc->p_decode != NULL)
+ return;
+ task->tk_action = rpc_exit_task;
+ rpc_wake_up_task(task);
+}
+
+/*
+ * 5a. Handle cleanup after a transmission
+ */
+static void
+call_transmit_status(struct rpc_task *task)
+{
+ task->tk_action = call_status;
+ /*
+ * Special case: if we've been waiting on the socket's write_space()
+ * callback, then don't call xprt_end_transmit().
+ */
+ if (task->tk_status == -EAGAIN)
+ return;
+ xprt_end_transmit(task);
rpc_task_force_reencode(task);
}
@@ -969,6 +1023,14 @@ call_status(struct rpc_task *task)
task->tk_status = 0;
switch(status) {
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ /*
+ * Delay any retries for 3 seconds, then handle as if it
+ * were a timeout.
+ */
+ rpc_delay(task, 3*HZ);
case -ETIMEDOUT:
task->tk_action = call_timeout;
break;
@@ -988,23 +1050,11 @@ call_status(struct rpc_task *task)
printk("%s: RPC call returned error %d\n",
clnt->cl_protname, -status);
rpc_exit(task, status);
- break;
}
}
/*
- * 6a. Handle transmission errors.
- */
-static void
-call_transmit_status(struct rpc_task *task)
-{
- if (task->tk_status != -EAGAIN)
- rpc_task_force_reencode(task);
- call_status(task);
-}
-
-/*
- * 6b. Handle RPC timeout
+ * 6a. Handle RPC timeout
* We do not release the request slot, so we keep using the
* same XID for all retransmits.
*/
@@ -1050,7 +1100,7 @@ call_decode(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode;
- u32 *p;
+ __be32 *p;
dprintk("RPC: %4d call_decode (status %d)\n",
task->tk_pid, task->tk_status);
@@ -1067,10 +1117,10 @@ call_decode(struct rpc_task *task)
clnt->cl_stats->rpcretrans++;
goto out_retry;
}
- printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
+ dprintk("%s: too small RPC reply size (%d bytes)\n",
clnt->cl_protname, task->tk_status);
- rpc_exit(task, -EIO);
- return;
+ task->tk_action = call_timeout;
+ goto out_retry;
}
/*
@@ -1147,12 +1197,12 @@ call_refreshresult(struct rpc_task *task)
/*
* Call header serialization
*/
-static u32 *
+static __be32 *
call_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- u32 *p = req->rq_svec[0].iov_base;
+ __be32 *p = req->rq_svec[0].iov_base;
/* FIXME: check buffer size? */
@@ -1171,14 +1221,26 @@ call_header(struct rpc_task *task)
/*
* Reply header verification
*/
-static u32 *
+static __be32 *
call_verify(struct rpc_task *task)
{
struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
- u32 *p = iov->iov_base, n;
+ __be32 *p = iov->iov_base;
+ u32 n;
int error = -EACCES;
+ if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
+ /* RFC-1014 says that the representation of XDR data must be a
+ * multiple of four bytes
+ * - if it isn't pointer subtraction in the NFS client may give
+ * undefined results
+ */
+ printk(KERN_WARNING
+ "call_verify: XDR representation not a multiple of"
+ " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
+ goto out_eio;
+ }
if ((len -= 3) < 0)
goto out_overflow;
p += 1; /* skip XID */
@@ -1242,7 +1304,7 @@ call_verify(struct rpc_task *task)
printk(KERN_WARNING "call_verify: auth check failed\n");
goto out_garbage; /* bad verifier, retry */
}
- len = p - (u32 *)iov->iov_base - 1;
+ len = p - (__be32 *)iov->iov_base - 1;
if (len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
@@ -1297,12 +1359,12 @@ out_overflow:
goto out_garbage;
}
-static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
+static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
{
return 0;
}
-static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
{
return 0;
}
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index d25b054ec921..919d5ba7ca0a 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -1,12 +1,13 @@
/*
- * linux/net/sunrpc/pmap.c
+ * linux/net/sunrpc/pmap_clnt.c
*
- * Portmapper client.
+ * In-kernel RPC portmapper client.
+ *
+ * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
@@ -14,7 +15,6 @@
#include <linux/uio.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#ifdef RPC_DEBUG
@@ -25,80 +25,141 @@
#define PMAP_UNSET 2
#define PMAP_GETPORT 3
+struct portmap_args {
+ u32 pm_prog;
+ u32 pm_vers;
+ u32 pm_prot;
+ unsigned short pm_port;
+ struct rpc_xprt * pm_xprt;
+};
+
static struct rpc_procinfo pmap_procedures[];
static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
-static void pmap_getport_done(struct rpc_task *);
+static void pmap_getport_done(struct rpc_task *, void *);
static struct rpc_program pmap_program;
-static DEFINE_SPINLOCK(pmap_lock);
-/*
- * Obtain the port for a given RPC service on a given host. This one can
- * be called for an ongoing RPC request.
- */
-void
-rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
+static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
{
- struct rpc_portmap *map = clnt->cl_pmap;
- struct sockaddr_in *sap = &clnt->cl_xprt->addr;
+ struct portmap_args *map = calldata;
struct rpc_message msg = {
.rpc_proc = &pmap_procedures[PMAP_GETPORT],
.rpc_argp = map,
- .rpc_resp = &clnt->cl_port,
- .rpc_cred = NULL
+ .rpc_resp = &map->pm_port,
};
+
+ rpc_call_setup(task, &msg, 0);
+}
+
+static inline struct portmap_args *pmap_map_alloc(void)
+{
+ return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
+}
+
+static inline void pmap_map_free(struct portmap_args *map)
+{
+ kfree(map);
+}
+
+static void pmap_map_release(void *data)
+{
+ pmap_map_free(data);
+}
+
+static const struct rpc_call_ops pmap_getport_ops = {
+ .rpc_call_prepare = pmap_getport_prepare,
+ .rpc_call_done = pmap_getport_done,
+ .rpc_release = pmap_map_release,
+};
+
+static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
+{
+ xprt_clear_binding(xprt);
+ rpc_wake_up_status(&xprt->binding, status);
+}
+
+/**
+ * rpc_getport - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
+ */
+void rpc_getport(struct rpc_task *task)
+{
+ struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct sockaddr_in addr;
+ struct portmap_args *map;
struct rpc_clnt *pmap_clnt;
- struct rpc_task *child;
+ struct rpc_task *child;
+ int status;
- dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
+ dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
task->tk_pid, clnt->cl_server,
- map->pm_prog, map->pm_vers, map->pm_prot);
+ clnt->cl_prog, clnt->cl_vers, xprt->prot);
/* Autobind on cloned rpc clients is discouraged */
BUG_ON(clnt->cl_parent != clnt);
- spin_lock(&pmap_lock);
- if (map->pm_binding) {
- rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
- spin_unlock(&pmap_lock);
+ if (xprt_test_and_set_binding(xprt)) {
+ task->tk_status = -EACCES; /* tell caller to check again */
+ rpc_sleep_on(&xprt->binding, task, NULL, NULL);
return;
}
- map->pm_binding = 1;
- spin_unlock(&pmap_lock);
- pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0);
- if (IS_ERR(pmap_clnt)) {
- task->tk_status = PTR_ERR(pmap_clnt);
+ /* Someone else may have bound if we slept */
+ status = 0;
+ if (xprt_bound(xprt))
+ goto bailout_nofree;
+
+ status = -ENOMEM;
+ map = pmap_map_alloc();
+ if (!map)
+ goto bailout_nofree;
+ map->pm_prog = clnt->cl_prog;
+ map->pm_vers = clnt->cl_vers;
+ map->pm_prot = xprt->prot;
+ map->pm_port = 0;
+ map->pm_xprt = xprt_get(xprt);
+
+ rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
+ pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
+ status = PTR_ERR(pmap_clnt);
+ if (IS_ERR(pmap_clnt))
goto bailout;
- }
- task->tk_status = 0;
- /*
- * Note: rpc_new_child will release client after a failure.
- */
- if (!(child = rpc_new_child(pmap_clnt, task)))
+ status = -EIO;
+ child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
+ if (IS_ERR(child))
goto bailout;
+ rpc_release_task(child);
- /* Setup the call info struct */
- rpc_call_setup(child, &msg, 0);
+ rpc_sleep_on(&xprt->binding, task, NULL, NULL);
- /* ... and run the child task */
task->tk_xprt->stat.bind_count++;
- rpc_run_child(task, child, pmap_getport_done);
return;
bailout:
- spin_lock(&pmap_lock);
- map->pm_binding = 0;
- rpc_wake_up(&map->pm_bindwait);
- spin_unlock(&pmap_lock);
- rpc_exit(task, -EIO);
+ pmap_map_free(map);
+ xprt_put(xprt);
+bailout_nofree:
+ task->tk_status = status;
+ pmap_wake_portmap_waiters(xprt, status);
}
#ifdef CONFIG_ROOT_NFS
-int
-rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
+/**
+ * rpc_getport_external - obtain the port for a given RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * This one is called from outside the RPC client in a synchronous task context.
+ */
+int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
{
- struct rpc_portmap map = {
+ struct portmap_args map = {
.pm_prog = prog,
.pm_vers = vers,
.pm_prot = prot,
@@ -113,7 +174,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
char hostname[32];
int status;
- dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
+ dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
@@ -133,45 +194,53 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
}
#endif
-static void
-pmap_getport_done(struct rpc_task *task)
+/*
+ * Portmapper child task invokes this callback via tk_exit.
+ */
+static void pmap_getport_done(struct rpc_task *child, void *data)
{
- struct rpc_clnt *clnt = task->tk_client;
- struct rpc_xprt *xprt = task->tk_xprt;
- struct rpc_portmap *map = clnt->cl_pmap;
-
- dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
- task->tk_pid, task->tk_status, clnt->cl_port);
-
- xprt->ops->set_port(xprt, 0);
- if (task->tk_status < 0) {
- /* Make the calling task exit with an error */
- task->tk_action = rpc_exit_task;
- } else if (clnt->cl_port == 0) {
- /* Program not registered */
- rpc_exit(task, -EACCES);
+ struct portmap_args *map = data;
+ struct rpc_xprt *xprt = map->pm_xprt;
+ int status = child->tk_status;
+
+ if (status < 0) {
+ /* Portmapper not available */
+ xprt->ops->set_port(xprt, 0);
+ } else if (map->pm_port == 0) {
+ /* Requested RPC service wasn't registered */
+ xprt->ops->set_port(xprt, 0);
+ status = -EACCES;
} else {
- xprt->ops->set_port(xprt, clnt->cl_port);
- clnt->cl_port = htons(clnt->cl_port);
+ /* Succeeded */
+ xprt->ops->set_port(xprt, map->pm_port);
+ xprt_set_bound(xprt);
+ status = 0;
}
- spin_lock(&pmap_lock);
- map->pm_binding = 0;
- rpc_wake_up(&map->pm_bindwait);
- spin_unlock(&pmap_lock);
+
+ dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
+ child->tk_pid, status, map->pm_port);
+
+ pmap_wake_portmap_waiters(xprt, status);
+ xprt_put(xprt);
}
-/*
- * Set or unset a port registration with the local portmapper.
+/**
+ * rpc_register - set or unset a port registration with the local portmapper
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ * @port: port value to register
+ * @okay: result code
+ *
* port == 0 means unregister, port != 0 means register.
*/
-int
-rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
};
- struct rpc_portmap map = {
+ struct portmap_args map = {
.pm_prog = prog,
.pm_vers = vers,
.pm_prot = prot,
@@ -185,7 +254,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
struct rpc_clnt *pmap_clnt;
int error = 0;
- dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
+ dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
prog, vers, prot, port);
pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
@@ -208,38 +277,32 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
return error;
}
-static struct rpc_clnt *
-pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
+static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
{
- struct rpc_xprt *xprt;
- struct rpc_clnt *clnt;
-
- /* printk("pmap: create xprt\n"); */
- xprt = xprt_create_proto(proto, srvaddr, NULL);
- if (IS_ERR(xprt))
- return (struct rpc_clnt *)xprt;
- xprt->ops->set_port(xprt, RPC_PMAP_PORT);
+ struct rpc_create_args args = {
+ .protocol = proto,
+ .address = (struct sockaddr *)srvaddr,
+ .addrsize = sizeof(*srvaddr),
+ .servername = hostname,
+ .program = &pmap_program,
+ .version = RPC_PMAP_VERSION,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = (RPC_CLNT_CREATE_ONESHOT |
+ RPC_CLNT_CREATE_NOPING),
+ };
+
+ srvaddr->sin_port = htons(RPC_PMAP_PORT);
if (!privileged)
- xprt->resvport = 0;
-
- /* printk("pmap: create clnt\n"); */
- clnt = rpc_new_client(xprt, hostname,
- &pmap_program, RPC_PMAP_VERSION,
- RPC_AUTH_UNIX);
- if (!IS_ERR(clnt)) {
- clnt->cl_softrtry = 1;
- clnt->cl_oneshot = 1;
- }
- return clnt;
+ args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ return rpc_create(&args);
}
/*
* XDR encode/decode functions for PMAP
*/
-static int
-xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
+static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map)
{
- dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
+ dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
*p++ = htonl(map->pm_prog);
*p++ = htonl(map->pm_vers);
@@ -250,15 +313,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
return 0;
}
-static int
-xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
+static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp)
{
*portp = (unsigned short) ntohl(*p++);
return 0;
}
-static int
-xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
+static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp)
{
*boolp = (unsigned int) ntohl(*p++);
return 0;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8241fa726803..700c6e061a04 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -8,7 +8,6 @@
* Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -328,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v)
seq_printf(m, "RPC server: %s\n", clnt->cl_server);
seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
clnt->cl_prog, clnt->cl_vers);
- seq_printf(m, "address: %u.%u.%u.%u\n",
- NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
- seq_printf(m, "protocol: %s\n",
- clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+ seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+ seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
return 0;
}
@@ -439,7 +436,7 @@ struct vfsmount *rpc_get_mount(void)
{
int err;
- err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+ err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count);
if (err != 0)
return ERR_PTR(err);
return rpc_mount;
@@ -491,7 +488,6 @@ rpc_get_inode(struct super_block *sb, int mode)
return NULL;
inode->i_mode = mode;
inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch(mode & S_IFMT) {
@@ -516,7 +512,7 @@ rpc_depopulate(struct dentry *parent)
struct dentry *dentry, *dvec[10];
int n = 0;
- mutex_lock(&dir->i_mutex);
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
repeat:
spin_lock(&dcache_lock);
list_for_each_safe(pos, next, &parent->d_subdirs) {
@@ -540,6 +536,7 @@ repeat:
rpc_close_pipes(dentry->d_inode);
simple_unlink(dir, dentry);
}
+ inode_dir_notify(dir, DN_DELETE);
dput(dentry);
} while (n);
goto repeat;
@@ -611,8 +608,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
int error;
shrink_dcache_parent(dentry);
- if (dentry->d_inode)
- rpc_close_pipes(dentry->d_inode);
+ if (d_unhashed(dentry))
+ return 0;
if ((error = simple_rmdir(dir, dentry)) != 0)
return error;
if (!error) {
@@ -623,17 +620,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
}
static struct dentry *
-rpc_lookup_negative(char *path, struct nameidata *nd)
+rpc_lookup_create(struct dentry *parent, const char *name, int len)
{
+ struct inode *dir = parent->d_inode;
struct dentry *dentry;
- struct inode *dir;
- int error;
- if ((error = rpc_lookup_parent(path, nd)) != 0)
- return ERR_PTR(error);
- dir = nd->dentry->d_inode;
- mutex_lock(&dir->i_mutex);
- dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_one_len(name, parent, len);
if (IS_ERR(dentry))
goto out_err;
if (dentry->d_inode) {
@@ -644,7 +637,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
return dentry;
out_err:
mutex_unlock(&dir->i_mutex);
- rpc_release_path(nd);
+ return dentry;
+}
+
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+ struct dentry *dentry;
+ int error;
+
+ if ((error = rpc_lookup_parent(path, nd)) != 0)
+ return ERR_PTR(error);
+ dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
+ if (IS_ERR(dentry))
+ rpc_release_path(nd);
return dentry;
}
@@ -668,10 +674,11 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
RPCAUTH_info, RPCAUTH_EOF);
if (error)
goto err_depopulate;
+ dget(dentry);
out:
mutex_unlock(&dir->i_mutex);
rpc_release_path(&nd);
- return dget(dentry);
+ return dentry;
err_depopulate:
rpc_depopulate(dentry);
__rpc_rmdir(dir, dentry);
@@ -684,44 +691,35 @@ err_dput:
}
int
-rpc_rmdir(char *path)
+rpc_rmdir(struct dentry *dentry)
{
- struct nameidata nd;
- struct dentry *dentry;
+ struct dentry *parent;
struct inode *dir;
int error;
- if ((error = rpc_lookup_parent(path, &nd)) != 0)
- return error;
- dir = nd.dentry->d_inode;
- mutex_lock(&dir->i_mutex);
- dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
- if (IS_ERR(dentry)) {
- error = PTR_ERR(dentry);
- goto out_release;
- }
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
rpc_depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
dput(dentry);
-out_release:
mutex_unlock(&dir->i_mutex);
- rpc_release_path(&nd);
+ dput(parent);
return error;
}
struct dentry *
-rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
{
- struct nameidata nd;
struct dentry *dentry;
struct inode *dir, *inode;
struct rpc_inode *rpci;
- dentry = rpc_lookup_negative(path, &nd);
+ dentry = rpc_lookup_create(parent, name, strlen(name));
if (IS_ERR(dentry))
return dentry;
- dir = nd.dentry->d_inode;
- inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
+ dir = parent->d_inode;
+ inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
if (!inode)
goto err_dput;
inode->i_ino = iunique(dir->i_sb, 100);
@@ -732,45 +730,40 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
rpci->flags = flags;
rpci->ops = ops;
inode_dir_notify(dir, DN_CREATE);
+ dget(dentry);
out:
mutex_unlock(&dir->i_mutex);
- rpc_release_path(&nd);
- return dget(dentry);
+ return dentry;
err_dput:
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
- printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
- __FILE__, __FUNCTION__, path, -ENOMEM);
+ printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
+ __FILE__, __FUNCTION__, parent->d_name.name, name,
+ -ENOMEM);
goto out;
}
int
-rpc_unlink(char *path)
+rpc_unlink(struct dentry *dentry)
{
- struct nameidata nd;
- struct dentry *dentry;
+ struct dentry *parent;
struct inode *dir;
- int error;
+ int error = 0;
- if ((error = rpc_lookup_parent(path, &nd)) != 0)
- return error;
- dir = nd.dentry->d_inode;
- mutex_lock(&dir->i_mutex);
- dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
- if (IS_ERR(dentry)) {
- error = PTR_ERR(dentry);
- goto out_release;
- }
- d_drop(dentry);
- if (dentry->d_inode) {
- rpc_close_pipes(dentry->d_inode);
- error = simple_unlink(dir, dentry);
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ if (!d_unhashed(dentry)) {
+ d_drop(dentry);
+ if (dentry->d_inode) {
+ rpc_close_pipes(dentry->d_inode);
+ error = simple_unlink(dir, dentry);
+ }
+ inode_dir_notify(dir, DN_DELETE);
}
dput(dentry);
- inode_dir_notify(dir, DN_DELETE);
-out_release:
mutex_unlock(&dir->i_mutex);
- rpc_release_path(&nd);
+ dput(parent);
return error;
}
@@ -864,7 +857,6 @@ int register_rpc_pipefs(void)
void unregister_rpc_pipefs(void)
{
- if (kmem_cache_destroy(rpc_inode_cachep))
- printk(KERN_WARNING "RPC: unable to free inode cache\n");
+ kmem_cache_destroy(rpc_inode_cachep);
unregister_filesystem(&rpc_pipe_fs_type);
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5c3eee768504..a1ab4eed41f4 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,7 +21,6 @@
#include <linux/mutex.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
@@ -45,12 +44,6 @@ static void rpciod_killall(void);
static void rpc_async_schedule(void *);
/*
- * RPC tasks that create another task (e.g. for contacting the portmapper)
- * will wait on this queue for their child's completion
- */
-static RPC_WAITQ(childq, "childq");
-
-/*
* RPC tasks sit here while waiting for conditions to improve.
*/
static RPC_WAITQ(delay_queue, "delayq");
@@ -324,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task)
}
/*
- * Place a newly initialized task on the workqueue.
- */
-static inline void
-rpc_schedule_run(struct rpc_task *task)
-{
- rpc_set_active(task);
- rpc_make_runnable(task);
-}
-
-/*
* Prepare for sleeping on a wait queue.
* By always appending tasks to the list we ensure FIFO behavior.
* NB: An RPC task will only receive interrupt-driven events as long
@@ -559,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
spin_unlock_bh(&queue->lock);
}
+static void __rpc_atrun(struct rpc_task *task)
+{
+ rpc_wake_up_task(task);
+}
+
/*
* Run a task at a later time
*/
-static void __rpc_atrun(struct rpc_task *);
-void
-rpc_delay(struct rpc_task *task, unsigned long delay)
+void rpc_delay(struct rpc_task *task, unsigned long delay)
{
task->tk_timeout = delay;
rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
}
-static void
-__rpc_atrun(struct rpc_task *task)
-{
- task->tk_status = 0;
- rpc_wake_up_task(task);
-}
-
/*
* Helper to call task->tk_ops->rpc_call_prepare
*/
@@ -933,72 +912,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
}
EXPORT_SYMBOL(rpc_run_task);
-/**
- * rpc_find_parent - find the parent of a child task.
- * @child: child task
- * @parent: parent task
- *
- * Checks that the parent task is still sleeping on the
- * queue 'childq'. If so returns a pointer to the parent.
- * Upon failure returns NULL.
- *
- * Caller must hold childq.lock
- */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
-{
- struct rpc_task *task;
- struct list_head *le;
-
- task_for_each(task, le, &childq.tasks[0])
- if (task == parent)
- return parent;
-
- return NULL;
-}
-
-static void rpc_child_exit(struct rpc_task *child, void *calldata)
-{
- struct rpc_task *parent;
-
- spin_lock_bh(&childq.lock);
- if ((parent = rpc_find_parent(child, calldata)) != NULL) {
- parent->tk_status = child->tk_status;
- __rpc_wake_up_task(parent);
- }
- spin_unlock_bh(&childq.lock);
-}
-
-static const struct rpc_call_ops rpc_child_ops = {
- .rpc_call_done = rpc_child_exit,
-};
-
-/*
- * Note: rpc_new_task releases the client after a failure.
- */
-struct rpc_task *
-rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
-{
- struct rpc_task *task;
-
- task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
- if (!task)
- goto fail;
- return task;
-
-fail:
- parent->tk_status = -ENOMEM;
- return NULL;
-}
-
-void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
-{
- spin_lock_bh(&childq.lock);
- /* N.B. Is it possible for the child to have already finished? */
- __rpc_sleep_on(&childq, task, func, NULL);
- rpc_schedule_run(child);
- spin_unlock_bh(&childq.lock);
-}
-
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
@@ -1146,10 +1059,10 @@ rpc_destroy_mempool(void)
mempool_destroy(rpc_buffer_mempool);
if (rpc_task_mempool)
mempool_destroy(rpc_task_mempool);
- if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp))
- printk(KERN_INFO "rpc_task: not all structures were freed\n");
- if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp))
- printk(KERN_INFO "rpc_buffers: not all structures were freed\n");
+ if (rpc_task_slabp)
+ kmem_cache_destroy(rpc_task_slabp);
+ if (rpc_buffer_slabp)
+ kmem_cache_destroy(rpc_buffer_slabp);
}
int
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index eb330d4f66d6..6f17527b9e69 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
return -1;
if ((unsigned short)csum_fold(desc.csum))
return -1;
- if (unlikely(skb->ip_summed == CHECKSUM_HW))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
return 0;
no_checksum:
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 15c2db26767b..bd98124c3a64 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -114,13 +114,8 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
*/
struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
{
- unsigned int ops = clnt->cl_maxproc;
- size_t size = ops * sizeof(struct rpc_iostats);
struct rpc_iostats *new;
-
- new = kmalloc(size, GFP_KERNEL);
- if (new)
- memset(new, 0 , size);
+ new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
return new;
}
EXPORT_SYMBOL(rpc_alloc_iostats);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 769114f0f886..26c0531d7e25 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -6,7 +6,6 @@
* Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -37,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status);
EXPORT_SYMBOL(rpc_release_task);
/* RPC client functions */
-EXPORT_SYMBOL(rpc_create_client);
-EXPORT_SYMBOL(rpc_new_client);
EXPORT_SYMBOL(rpc_clone_client);
EXPORT_SYMBOL(rpc_bind_new_program);
EXPORT_SYMBOL(rpc_destroy_client);
@@ -58,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall);
EXPORT_SYMBOL(rpc_mkpipe);
/* Client transport */
-EXPORT_SYMBOL(xprt_create_proto);
EXPORT_SYMBOL(xprt_set_timeout);
/* Client credential cache */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b08419e1fc68..44b8d9d4c18a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,9 +32,8 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
int vers;
unsigned int xdrsize;
- if (!(serv = kmalloc(sizeof(*serv), GFP_KERNEL)))
+ if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
- memset(serv, 0, sizeof(*serv));
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
@@ -159,11 +158,10 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
struct svc_rqst *rqstp;
int error = -ENOMEM;
- rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
+ rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
goto out;
- memset(rqstp, 0, sizeof(*rqstp));
init_waitqueue_head(&rqstp->rq_wait);
if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
@@ -258,11 +256,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
struct kvec * argv = &rqstp->rq_arg.head[0];
struct kvec * resv = &rqstp->rq_res.head[0];
kxdrproc_t xdr;
- u32 *statp;
- u32 dir, prog, vers, proc,
- auth_stat, rpc_stat;
+ __be32 *statp;
+ u32 dir, prog, vers, proc;
+ __be32 auth_stat, rpc_stat;
int auth_res;
- u32 *accept_statp;
+ __be32 *accept_statp;
rpc_stat = rpc_success;
@@ -280,19 +278,22 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = 0;
rqstp->rq_res.buflen = PAGE_SIZE;
+ rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
+ /* Will be turned off only in gss privacy case: */
+ rqstp->rq_sendfile_ok = 1;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
- svc_putu32(resv, 0);
+ svc_putnl(resv, 0);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
- dir = ntohl(svc_getu32(argv));
- vers = ntohl(svc_getu32(argv));
+ dir = svc_getnl(argv);
+ vers = svc_getnl(argv);
/* First words of reply: */
- svc_putu32(resv, xdr_one); /* REPLY */
+ svc_putnl(resv, 1); /* REPLY */
if (dir != 0) /* direction != CALL */
goto err_bad_dir;
@@ -302,11 +303,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* Save position in case we later decide to reject: */
accept_statp = resv->iov_base + resv->iov_len;
- svc_putu32(resv, xdr_zero); /* ACCEPT */
+ svc_putnl(resv, 0); /* ACCEPT */
- rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
- rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */
- rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
+ rqstp->rq_prog = prog = svc_getnl(argv); /* program number */
+ rqstp->rq_vers = vers = svc_getnl(argv); /* version number */
+ rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */
progp = serv->sv_program;
@@ -360,7 +361,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* Build the reply header. */
statp = resv->iov_base +resv->iov_len;
- svc_putu32(resv, rpc_success); /* RPC_SUCCESS */
+ svc_putnl(resv, RPC_SUCCESS);
/* Bump per-procedure stats counter */
procp->pc_count++;
@@ -438,10 +439,10 @@ err_bad_dir:
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resv, xdr_one); /* REJECT */
- svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */
- svc_putu32(resv, xdr_two); /* Only RPCv2 supported */
- svc_putu32(resv, xdr_two);
+ svc_putnl(resv, 1); /* REJECT */
+ svc_putnl(resv, 0); /* RPC_MISMATCH */
+ svc_putnl(resv, 2); /* Only RPCv2 supported */
+ svc_putnl(resv, 2);
goto sendit;
err_bad_auth:
@@ -449,15 +450,15 @@ err_bad_auth:
serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of accept status: */
xdr_ressize_check(rqstp, accept_statp);
- svc_putu32(resv, xdr_one); /* REJECT */
- svc_putu32(resv, xdr_one); /* AUTH_ERROR */
- svc_putu32(resv, auth_stat); /* status */
+ svc_putnl(resv, 1); /* REJECT */
+ svc_putnl(resv, 1); /* AUTH_ERROR */
+ svc_putnl(resv, ntohl(auth_stat)); /* status */
goto sendit;
err_bad_prog:
dprintk("svc: unknown program %d\n", prog);
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resv, rpc_prog_unavail);
+ svc_putnl(resv, RPC_PROG_UNAVAIL);
goto sendit;
err_bad_vers:
@@ -465,9 +466,9 @@ err_bad_vers:
printk("svc: unknown version (%d)\n", vers);
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resv, rpc_prog_mismatch);
- svc_putu32(resv, htonl(progp->pg_lovers));
- svc_putu32(resv, htonl(progp->pg_hivers));
+ svc_putnl(resv, RPC_PROG_MISMATCH);
+ svc_putnl(resv, progp->pg_lovers);
+ svc_putnl(resv, progp->pg_hivers);
goto sendit;
err_bad_proc:
@@ -475,7 +476,7 @@ err_bad_proc:
printk("svc: unknown procedure (%d)\n", proc);
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resv, rpc_proc_unavail);
+ svc_putnl(resv, RPC_PROC_UNAVAIL);
goto sendit;
err_garbage:
@@ -485,6 +486,6 @@ err_garbage:
rpc_stat = rpc_garbage_args;
err_bad:
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resv, rpc_stat);
+ svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5b28c6176806..8f2320aded5c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -35,14 +35,14 @@ static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = {
};
int
-svc_authenticate(struct svc_rqst *rqstp, u32 *authp)
+svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
{
rpc_authflavor_t flavor;
struct auth_ops *aops;
*authp = rpc_auth_ok;
- flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
+ flavor = svc_getnl(&rqstp->rq_arg.head[0]);
dprintk("svc: svc_authenticate (%d)\n", flavor);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 7e5707e2d6b6..1020d54b01d0 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -145,7 +145,7 @@ static void ip_map_request(struct cache_detail *cd,
{
char text_addr[20];
struct ip_map *im = container_of(h, struct ip_map, h);
- __u32 addr = im->m_addr.s_addr;
+ __be32 addr = im->m_addr.s_addr;
snprintf(text_addr, 20, "%u.%u.%u.%u",
ntohl(addr) >> 24 & 0xff,
@@ -249,10 +249,10 @@ static int ip_map_show(struct seq_file *m,
seq_printf(m, "%s %d.%d.%d.%d %s\n",
im->m_class,
- htonl(addr.s_addr) >> 24 & 0xff,
- htonl(addr.s_addr) >> 16 & 0xff,
- htonl(addr.s_addr) >> 8 & 0xff,
- htonl(addr.s_addr) >> 0 & 0xff,
+ ntohl(addr.s_addr) >> 24 & 0xff,
+ ntohl(addr.s_addr) >> 16 & 0xff,
+ ntohl(addr.s_addr) >> 8 & 0xff,
+ ntohl(addr.s_addr) >> 0 & 0xff,
dom
);
return 0;
@@ -410,7 +410,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
}
static int
-svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -427,7 +427,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
- if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+ if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return SVC_DENIED;
@@ -441,8 +441,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
return SVC_DROP; /* kmalloc failure - client must retry */
/* Put NULL verifier */
- svc_putu32(resv, RPC_AUTH_NULL);
- svc_putu32(resv, 0);
+ svc_putnl(resv, RPC_AUTH_NULL);
+ svc_putnl(resv, 0);
return SVC_OK;
}
@@ -472,7 +472,7 @@ struct auth_ops svcauth_null = {
static int
-svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -488,31 +488,31 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
svc_getu32(argv); /* length */
svc_getu32(argv); /* time stamp */
- slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */
+ slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
- argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */
+ argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
- cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */
- cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */
- slen = ntohl(svc_getu32(argv)); /* gids length */
+ cred->cr_uid = svc_getnl(argv); /* uid */
+ cred->cr_gid = svc_getnl(argv); /* gid */
+ slen = svc_getnl(argv); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
return SVC_DROP;
for (i = 0; i < slen; i++)
- GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
+ GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
- if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+ if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Put NULL verifier */
- svc_putu32(resv, RPC_AUTH_NULL);
- svc_putu32(resv, 0);
+ svc_putnl(resv, RPC_AUTH_NULL);
+ svc_putnl(resv, 0);
return SVC_OK;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a27905a0ad27..5b0fe1b66a23 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
/* send head */
if (slen == xdr->head[0].iov_len)
flags = 0;
- len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+ len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
if (len != xdr->head[0].iov_len)
goto out;
slen -= xdr->head[0].iov_len;
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
while (pglen > 0) {
if (slen == size)
flags = 0;
- result = sock->ops->sendpage(sock, *ppage, base, size, flags);
+ result = kernel_sendpage(sock, *ppage, base, size, flags);
if (result > 0)
len += result;
if (result != size)
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
}
/* send tail */
if (xdr->tail[0].iov_len) {
- result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
+ result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
xdr->tail[0].iov_len, 0);
@@ -434,13 +434,10 @@ out:
static int
svc_recv_available(struct svc_sock *svsk)
{
- mm_segment_t oldfs;
struct socket *sock = svsk->sk_sock;
int avail, err;
- oldfs = get_fs(); set_fs(KERNEL_DS);
- err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
- set_fs(oldfs);
+ err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
return (err >= 0)? avail : err;
}
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
* at accept time. FIXME
*/
alen = sizeof(rqstp->rq_addr);
- sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+ kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk)
struct svc_serv *serv = svsk->sk_server;
struct socket *sock = svsk->sk_sock;
struct socket *newsock;
- const struct proto_ops *ops;
struct svc_sock *newsvsk;
int err, slen;
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk)
if (!sock)
return;
- err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
- if (err) {
+ clear_bit(SK_CONN, &svsk->sk_flags);
+ err = kernel_accept(sock, &newsock, O_NONBLOCK);
+ if (err < 0) {
if (err == -ENOMEM)
printk(KERN_WARNING "%s: no more sockets!\n",
serv->sv_name);
- return;
- }
-
- dprintk("svc: tcp_accept %p allocated\n", newsock);
- newsock->ops = ops = sock->ops;
-
- clear_bit(SK_CONN, &svsk->sk_flags);
- if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
- if (err != -EAGAIN && net_ratelimit())
+ else if (err != -EAGAIN && net_ratelimit())
printk(KERN_WARNING "%s: accept failed (err %d)!\n",
serv->sv_name, -err);
- goto failed; /* aborted connection or whatever */
+ return;
}
+
set_bit(SK_CONN, &svsk->sk_flags);
svc_sock_enqueue(svsk);
slen = sizeof(sin);
- err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
+ err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
if (err < 0) {
if (net_ratelimit())
printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -1040,7 +1030,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
{
struct xdr_buf *xbufp = &rqstp->rq_res;
int sent;
- u32 reclen;
+ __be32 reclen;
/* Set up the first element of the reply kvec.
* Any other kvecs that may be in use have been taken
@@ -1322,11 +1312,10 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
struct sock *inet;
dprintk("svc: svc_setup_socket %p\n", sock);
- if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
+ if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
*errp = -ENOMEM;
return NULL;
}
- memset(svsk, 0, sizeof(*svsk));
inet = sock->sk;
@@ -1404,17 +1393,15 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
return error;
- if (sin != NULL) {
- if (type == SOCK_STREAM)
- sock->sk->sk_reuse = 1; /* allow address reuse */
- error = sock->ops->bind(sock, (struct sockaddr *) sin,
- sizeof(*sin));
- if (error < 0)
- goto bummer;
- }
+ if (type == SOCK_STREAM)
+ sock->sk->sk_reuse = 1; /* allow address reuse */
+ error = kernel_bind(sock, (struct sockaddr *) sin,
+ sizeof(*sin));
+ if (error < 0)
+ goto bummer;
if (protocol == IPPROTO_TCP) {
- if ((error = sock->ops->listen(sock, 64)) < 0)
+ if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 1065904841fd..d89b048ad6bb 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -7,7 +7,6 @@
* impossible at the moment.
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/linkage.h>
#include <linux/ctype.h>
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index bcbdf6430d5c..8142fdb8a930 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -19,8 +19,6 @@
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/timer.h>
#define RPC_RTO_MAX (60*HZ)
#define RPC_RTO_INIT (HZ/5)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index ca4bfa57e116..9022eb8b37ed 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -18,8 +18,8 @@
/*
* XDR functions for basic NFS types
*/
-u32 *
-xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
+__be32 *
+xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
{
unsigned int quadlen = XDR_QUADLEN(obj->len);
@@ -29,8 +29,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
return p + XDR_QUADLEN(obj->len);
}
-u32 *
-xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
+__be32 *
+xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
{
unsigned int len;
@@ -55,7 +55,7 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
* Returns the updated current XDR buffer position
*
*/
-u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes)
+__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes)
{
if (likely(nbytes != 0)) {
unsigned int quadlen = XDR_QUADLEN(nbytes);
@@ -79,21 +79,21 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed);
*
* Returns the updated current XDR buffer position
*/
-u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes)
+__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
{
*p++ = htonl(nbytes);
return xdr_encode_opaque_fixed(p, ptr, nbytes);
}
EXPORT_SYMBOL(xdr_encode_opaque);
-u32 *
-xdr_encode_string(u32 *p, const char *string)
+__be32 *
+xdr_encode_string(__be32 *p, const char *string)
{
return xdr_encode_array(p, string, strlen(string));
}
-u32 *
-xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
+__be32 *
+xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
{
unsigned int len;
@@ -191,7 +191,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
do {
/* Are any pointers crossing a page boundary? */
if (pgto_base == 0) {
- flush_dcache_page(*pgto);
pgto_base = PAGE_CACHE_SIZE;
pgto--;
}
@@ -211,11 +210,11 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
vto = kmap_atomic(*pgto, KM_USER0);
vfrom = kmap_atomic(*pgfrom, KM_USER1);
memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
+ flush_dcache_page(*pgto);
kunmap_atomic(vfrom, KM_USER1);
kunmap_atomic(vto, KM_USER0);
} while ((len -= copy) != 0);
- flush_dcache_page(*pgto);
}
/*
@@ -433,7 +432,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
* of the buffer length, and takes care of adjusting the kvec
* length for us.
*/
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
{
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -441,8 +440,8 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
- xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
- xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len);
+ xdr->p = (__be32 *)((char *)iov->iov_base + iov->iov_len);
+ xdr->end = (__be32 *)((char *)iov->iov_base + scratch_len);
BUG_ON(iov->iov_len > scratch_len);
if (p != xdr->p && p != NULL) {
@@ -466,10 +465,10 @@ EXPORT_SYMBOL(xdr_init_encode);
* bytes of data. If so, update the total xdr_buf length, and
* adjust the length of the current kvec.
*/
-uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
+__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
{
- uint32_t *p = xdr->p;
- uint32_t *q;
+ __be32 *p = xdr->p;
+ __be32 *q;
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
@@ -525,7 +524,7 @@ EXPORT_SYMBOL(xdr_write_pages);
* @buf: pointer to XDR buffer from which to decode data
* @p: current pointer inside XDR buffer
*/
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
{
struct kvec *iov = buf->head;
unsigned int len = iov->iov_len;
@@ -535,7 +534,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
xdr->buf = buf;
xdr->iov = iov;
xdr->p = p;
- xdr->end = (uint32_t *)((char *)iov->iov_base + len);
+ xdr->end = (__be32 *)((char *)iov->iov_base + len);
}
EXPORT_SYMBOL(xdr_init_decode);
@@ -549,10 +548,10 @@ EXPORT_SYMBOL(xdr_init_decode);
* If so return the current pointer, then update the current
* pointer position.
*/
-uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
+__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
- uint32_t *p = xdr->p;
- uint32_t *q = p + XDR_QUADLEN(nbytes);
+ __be32 *p = xdr->p;
+ __be32 *q = p + XDR_QUADLEN(nbytes);
if (unlikely(q > xdr->end || q < p))
return NULL;
@@ -568,8 +567,7 @@ EXPORT_SYMBOL(xdr_inline_decode);
*
* Moves data beyond the current pointer position from the XDR head[] buffer
* into the page list. Any data that lies beyond current position + "len"
- * bytes is moved into the XDR tail[]. The current pointer is then
- * repositioned at the beginning of the XDR tail.
+ * bytes is moved into the XDR tail[].
*/
void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
{
@@ -601,11 +599,36 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
- xdr->p = (uint32_t *)((char *)iov->iov_base + padding);
- xdr->end = (uint32_t *)((char *)iov->iov_base + end);
+ xdr->p = (__be32 *)((char *)iov->iov_base + padding);
+ xdr->end = (__be32 *)((char *)iov->iov_base + end);
}
EXPORT_SYMBOL(xdr_read_pages);
+/**
+ * xdr_enter_page - decode data from the XDR page
+ * @xdr: pointer to xdr_stream struct
+ * @len: number of bytes of page data
+ *
+ * Moves data beyond the current pointer position from the XDR head[] buffer
+ * into the page list. Any data that lies beyond current position + "len"
+ * bytes is moved into the XDR tail[]. The current pointer is then
+ * repositioned at the beginning of the first XDR page.
+ */
+void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
+{
+ char * kaddr = page_address(xdr->buf->pages[0]);
+ xdr_read_pages(xdr, len);
+ /*
+ * Position current pointer at beginning of tail, and
+ * set remaining message length.
+ */
+ if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
+ len = PAGE_CACHE_SIZE - xdr->buf->page_base;
+ xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
+ xdr->end = (__be32 *)((char *)xdr->p + len);
+}
+EXPORT_SYMBOL(xdr_enter_page);
+
static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
void
@@ -720,7 +743,7 @@ out:
int
xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
{
- u32 raw;
+ __be32 raw;
int status;
status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
@@ -733,7 +756,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
int
xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
{
- u32 raw = htonl(obj);
+ __be32 raw = htonl(obj);
return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4dd5b3cfe754..80857470dc11 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -41,7 +41,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/workqueue.h>
-#include <linux/random.h>
+#include <linux/net.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/metrics.h>
@@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task)
dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
xprt, (xprt_connected(xprt) ? "is" : "is not"));
- if (!xprt->addr.sin_port) {
+ if (!xprt_bound(xprt)) {
task->tk_status = -EIO;
return;
}
@@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task)
task->tk_pid, -task->tk_status, task->tk_client->cl_server);
xprt_release_write(xprt, task);
task->tk_status = -EIO;
- return;
- }
-
- /* if soft mounted, just cause this RPC to fail */
- if (RPC_IS_SOFT(task)) {
- xprt_release_write(xprt, task);
- task->tk_status = -EIO;
}
}
@@ -601,7 +594,7 @@ static void xprt_connect_status(struct rpc_task *task)
* @xid: RPC XID of incoming reply
*
*/
-struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
+struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
struct list_head *pos;
@@ -707,12 +700,9 @@ out_unlock:
return err;
}
-void
-xprt_abort_transmit(struct rpc_task *task)
+void xprt_end_transmit(struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_xprt;
-
- xprt_release_write(xprt, task);
+ xprt_release_write(task->tk_xprt, task);
}
/**
@@ -761,8 +751,6 @@ void xprt_transmit(struct rpc_task *task)
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
-
- xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return;
}
@@ -772,18 +760,8 @@ void xprt_transmit(struct rpc_task *task)
* schedq, and being picked up by a parallel run of rpciod().
*/
task->tk_status = status;
-
- switch (status) {
- case -ECONNREFUSED:
+ if (status == -ECONNREFUSED)
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
- case -EAGAIN:
- case -ENOTCONN:
- return;
- default:
- break;
- }
- xprt_release_write(xprt, task);
- return;
}
static inline void do_xprt_reserve(struct rpc_task *task)
@@ -823,14 +801,14 @@ void xprt_reserve(struct rpc_task *task)
spin_unlock(&xprt->reserve_lock);
}
-static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
+static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
{
return xprt->xid++;
}
static inline void xprt_init_xid(struct rpc_xprt *xprt)
{
- get_random_bytes(&xprt->xid, sizeof(xprt->xid));
+ xprt->xid = net_random();
}
static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
@@ -844,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_bufsize = 0;
req->rq_xid = xprt_alloc_xid(xprt);
req->rq_release_snd_buf = NULL;
+ xprt_reset_majortimeo(req);
dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
req, ntohl(req->rq_xid));
}
@@ -902,17 +881,32 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
to->to_exponential = 0;
}
-static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
+/**
+ * xprt_create_transport - create an RPC transport
+ * @proto: requested transport protocol
+ * @ap: remote peer address
+ * @size: length of address
+ * @to: timeout parameters
+ *
+ */
+struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
{
int result;
struct rpc_xprt *xprt;
struct rpc_rqst *req;
- if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
+ if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
+ dprintk("RPC: xprt_create_transport: no memory\n");
return ERR_PTR(-ENOMEM);
- memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
-
- xprt->addr = *ap;
+ }
+ if (size <= sizeof(xprt->addr)) {
+ memcpy(&xprt->addr, ap, size);
+ xprt->addrlen = size;
+ } else {
+ kfree(xprt);
+ dprintk("RPC: xprt_create_transport: address too large\n");
+ return ERR_PTR(-EBADF);
+ }
switch (proto) {
case IPPROTO_UDP:
@@ -924,14 +918,15 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
default:
printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
proto);
- result = -EIO;
- break;
+ return ERR_PTR(-EIO);
}
if (result) {
kfree(xprt);
+ dprintk("RPC: xprt_create_transport: failed, %d\n", result);
return ERR_PTR(result);
}
+ kref_init(&xprt->kref);
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
@@ -944,6 +939,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
+ rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
rpc_init_wait_queue(&xprt->sending, "xprt_sending");
rpc_init_wait_queue(&xprt->resend, "xprt_resend");
@@ -957,41 +953,43 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
dprintk("RPC: created transport %p with %u slots\n", xprt,
xprt->max_reqs);
-
- return xprt;
-}
-/**
- * xprt_create_proto - create an RPC client transport
- * @proto: requested transport protocol
- * @sap: remote peer's address
- * @to: timeout parameters for new transport
- *
- */
-struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
-{
- struct rpc_xprt *xprt;
-
- xprt = xprt_setup(proto, sap, to);
- if (IS_ERR(xprt))
- dprintk("RPC: xprt_create_proto failed\n");
- else
- dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
return xprt;
}
/**
* xprt_destroy - destroy an RPC transport, killing off all requests.
- * @xprt: transport to destroy
+ * @kref: kref for the transport to destroy
*
*/
-int xprt_destroy(struct rpc_xprt *xprt)
+static void xprt_destroy(struct kref *kref)
{
+ struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
+
dprintk("RPC: destroying transport %p\n", xprt);
xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
xprt->ops->destroy(xprt);
kfree(xprt);
+}
- return 0;
+/**
+ * xprt_put - release a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+void xprt_put(struct rpc_xprt *xprt)
+{
+ kref_put(&xprt->kref, xprt_destroy);
+}
+
+/**
+ * xprt_get - return a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
+{
+ kref_get(&xprt->kref);
+ return xprt;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4b4e7dfdff14..28100e019225 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
}
#endif
+static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+ char *buf;
+
+ buf = kzalloc(20, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 20, "%u.%u.%u.%u",
+ NIPQUAD(addr->sin_addr.s_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%u",
+ ntohs(addr->sin_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+ if (xprt->prot == IPPROTO_UDP)
+ xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
+ else
+ xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+
+ buf = kzalloc(48, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port),
+ xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+}
+
+static void xs_free_peer_addresses(struct rpc_xprt *xprt)
+{
+ kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+}
+
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
@@ -174,7 +215,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
int err, ret = 0;
- ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
if (unlikely(!sock))
return -ENOTCONN;
@@ -207,7 +247,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
base &= ~PAGE_CACHE_MASK;
}
- sendpage = sock->ops->sendpage ? : sock_no_sendpage;
do {
int flags = XS_SENDMSG_FLAGS;
@@ -220,10 +259,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
if (pglen != len || xdr->tail[0].iov_len != 0)
flags |= MSG_MORE;
- /* Hmm... We might be dealing with highmem pages */
- if (PageHighMem(*ppage))
- sendpage = sock_no_sendpage;
- err = sendpage(sock, *ppage, base, len, flags);
+ err = kernel_sendpage(sock, *ppage, base, len, flags);
if (ret == 0)
ret = err;
else if (err > 0)
@@ -300,7 +336,7 @@ static int xs_udp_send_request(struct rpc_task *task)
req->rq_xtime = jiffies;
status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
- sizeof(xprt->addr), xdr, req->rq_bytes_sent);
+ xprt->addrlen, xdr, req->rq_bytes_sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -414,6 +450,33 @@ static int xs_tcp_send_request(struct rpc_task *task)
}
/**
+ * xs_tcp_release_xprt - clean up after a tcp transmission
+ * @xprt: transport
+ * @task: rpc task
+ *
+ * This cleans up if an error causes us to abort the transmission of a request.
+ * In this case, the socket may need to be reset in order to avoid confusing
+ * the server.
+ */
+static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ struct rpc_rqst *req;
+
+ if (task != xprt->snd_task)
+ return;
+ if (task == NULL)
+ goto out_release;
+ req = task->tk_rqstp;
+ if (req->rq_bytes_sent == 0)
+ goto out_release;
+ if (req->rq_bytes_sent == req->rq_snd_buf.len)
+ goto out_release;
+ set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state);
+out_release:
+ xprt_release_xprt(xprt, task);
+}
+
+/**
* xs_close - close a socket
* @xprt: transport
*
@@ -463,6 +526,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
xprt_disconnect(xprt);
xs_close(xprt);
+ xs_free_peer_addresses(xprt);
kfree(xprt->slot);
}
@@ -484,7 +548,8 @@ static void xs_udp_data_ready(struct sock *sk, int len)
struct rpc_rqst *rovr;
struct sk_buff *skb;
int err, repsize, copied;
- u32 _xid, *xp;
+ u32 _xid;
+ __be32 *xp;
read_lock(&sk->sk_callback_lock);
dprintk("RPC: xs_udp_data_ready...\n");
@@ -930,6 +995,26 @@ static void xs_udp_timer(struct rpc_task *task)
xprt_adjust_cwnd(task, -ETIMEDOUT);
}
+static unsigned short xs_get_random_port(void)
+{
+ unsigned short range = xprt_max_resvport - xprt_min_resvport;
+ unsigned short rand = (unsigned short) net_random() % range;
+ return rand + xprt_min_resvport;
+}
+
+/**
+ * xs_print_peer_address - format an IPv4 address for printing
+ * @xprt: generic transport
+ * @format: flags field indicating which parts of the address to render
+ */
+static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
+{
+ if (xprt->address_strings[format] != NULL)
+ return xprt->address_strings[format];
+ else
+ return "unprintable";
+}
+
/**
* xs_set_port - reset the port number in the remote endpoint address
* @xprt: generic transport
@@ -938,8 +1023,11 @@ static void xs_udp_timer(struct rpc_task *task)
*/
static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
{
+ struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+
dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
- xprt->addr.sin_port = htons(port);
+
+ sap->sin_port = htons(port);
}
static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
@@ -952,7 +1040,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
do {
myaddr.sin_port = htons(port);
- err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+ err = kernel_bind(sock, (struct sockaddr *) &myaddr,
sizeof(myaddr));
if (err == 0) {
xprt->port = port;
@@ -982,11 +1070,9 @@ static void xs_udp_connect_worker(void *args)
struct socket *sock = xprt->sock;
int err, status = -EIO;
- if (xprt->shutdown || xprt->addr.sin_port == 0)
+ if (xprt->shutdown || !xprt_bound(xprt))
goto out;
- dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt);
-
/* Start by resetting any existing state */
xs_close(xprt);
@@ -1000,6 +1086,9 @@ static void xs_udp_connect_worker(void *args)
goto out;
}
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
if (!xprt->inet) {
struct sock *sk = sock->sk;
@@ -1047,7 +1136,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
*/
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
- result = sock->ops->connect(sock, &any, sizeof(any), 0);
+ result = kernel_connect(sock, &any, sizeof(any), 0);
if (result)
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
@@ -1065,11 +1154,9 @@ static void xs_tcp_connect_worker(void *args)
struct socket *sock = xprt->sock;
int err, status = -EIO;
- if (xprt->shutdown || xprt->addr.sin_port == 0)
+ if (xprt->shutdown || !xprt_bound(xprt))
goto out;
- dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt);
-
if (!xprt->sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
@@ -1085,6 +1172,9 @@ static void xs_tcp_connect_worker(void *args)
/* "close" the socket, preserving the local port */
xs_tcp_reuse_connection(xprt);
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
if (!xprt->inet) {
struct sock *sk = sock->sk;
@@ -1117,8 +1207,8 @@ static void xs_tcp_connect_worker(void *args)
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
- status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
- sizeof(xprt->addr), O_NONBLOCK);
+ status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
+ xprt->addrlen, O_NONBLOCK);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
if (status < 0) {
@@ -1226,8 +1316,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
+ .print_addr = xs_print_peer_address,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
+ .rpcbind = rpc_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1242,8 +1334,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
};
static struct rpc_xprt_ops xs_tcp_ops = {
+ .print_addr = xs_print_peer_address,
.reserve_xprt = xprt_reserve_xprt,
- .release_xprt = xprt_release_xprt,
+ .release_xprt = xs_tcp_release_xprt,
+ .rpcbind = rpc_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1264,20 +1358,20 @@ static struct rpc_xprt_ops xs_tcp_ops = {
int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
{
size_t slot_table_size;
-
- dprintk("RPC: setting up udp-ipv4 transport...\n");
+ struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
xprt->max_reqs = xprt_udp_slot_table_entries;
slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
- xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
+ xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
if (xprt->slot == NULL)
return -ENOMEM;
- memset(xprt->slot, 0, slot_table_size);
+
+ if (ntohs(addr->sin_port != 0))
+ xprt_set_bound(xprt);
+ xprt->port = xs_get_random_port();
xprt->prot = IPPROTO_UDP;
- xprt->port = xprt_max_resvport;
xprt->tsh_size = 0;
- xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
@@ -1294,6 +1388,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
else
xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
+ xs_format_peer_addresses(xprt);
+ dprintk("RPC: set up transport to address %s\n",
+ xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
return 0;
}
@@ -1306,20 +1404,20 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
{
size_t slot_table_size;
-
- dprintk("RPC: setting up tcp-ipv4 transport...\n");
+ struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
xprt->max_reqs = xprt_tcp_slot_table_entries;
slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
- xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
+ xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
if (xprt->slot == NULL)
return -ENOMEM;
- memset(xprt->slot, 0, slot_table_size);
+
+ if (ntohs(addr->sin_port) != 0)
+ xprt_set_bound(xprt);
+ xprt->port = xs_get_random_port();
xprt->prot = IPPROTO_TCP;
- xprt->port = xprt_max_resvport;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
- xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
@@ -1335,5 +1433,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
else
xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
+ xs_format_peer_addresses(xprt);
+ dprintk("RPC: set up transport to address %s\n",
+ xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
return 0;
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 58a1b6b42ddd..cd4eafbab1b8 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -12,7 +12,6 @@
*
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2c4ecbe50082..1bb75703f384 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -49,13 +49,19 @@
#include "name_table.h"
#include "bcast.h"
-
#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
#define BCLINK_LOG_BUF_SIZE 0
+/*
+ * Loss rate for incoming broadcast frames; used to test retransmission code.
+ * Set to N to cause every N'th frame to be discarded; 0 => don't discard any.
+ */
+
+#define TIPC_BCAST_LOSS_RATE 0
+
/**
* struct bcbearer_pair - a pair of bearers used by broadcast link
* @primary: pointer to primary bearer
@@ -75,7 +81,14 @@ struct bcbearer_pair {
* @bearer: (non-standard) broadcast bearer structure
* @media: (non-standard) broadcast media structure
* @bpairs: array of bearer pairs
- * @bpairs_temp: array of bearer pairs used during creation of "bpairs"
+ * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
+ * @remains: temporary node map used by tipc_bcbearer_send()
+ * @remains_new: temporary node map used tipc_bcbearer_send()
+ *
+ * Note: The fields labelled "temporary" are incorporated into the bearer
+ * to avoid consuming potentially limited stack space through the use of
+ * large local variables within multicast routines. Concurrent access is
+ * prevented through use of the spinlock "bc_lock".
*/
struct bcbearer {
@@ -83,6 +96,8 @@ struct bcbearer {
struct media media;
struct bcbearer_pair bpairs[MAX_BEARERS];
struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
+ struct node_map remains;
+ struct node_map remains_new;
};
/**
@@ -102,7 +117,7 @@ struct bclink {
static struct bcbearer *bcbearer = NULL;
static struct bclink *bclink = NULL;
static struct link *bcl = NULL;
-static spinlock_t bc_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(bc_lock);
char tipc_bclink_name[] = "multicast-link";
@@ -165,21 +180,18 @@ static int bclink_ack_allowed(u32 n)
* @after: sequence number of last packet to *not* retransmit
* @to: sequence number of last packet to retransmit
*
- * Called with 'node' locked, bc_lock unlocked
+ * Called with bc_lock locked
*/
static void bclink_retransmit_pkt(u32 after, u32 to)
{
struct sk_buff *buf;
- spin_lock_bh(&bc_lock);
buf = bcl->first_out;
while (buf && less_eq(buf_seqno(buf), after)) {
buf = buf->next;
}
- if (buf != NULL)
- tipc_link_retransmit(bcl, buf, mod(to - after));
- spin_unlock_bh(&bc_lock);
+ tipc_link_retransmit(bcl, buf, mod(to - after));
}
/**
@@ -346,8 +358,10 @@ static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 g
for (; buf; buf = buf->next) {
u32 seqno = buf_seqno(buf);
- if (mod(seqno - prev) != 1)
+ if (mod(seqno - prev) != 1) {
buf = NULL;
+ break;
+ }
if (seqno == gap_after)
break;
prev = seqno;
@@ -399,7 +413,10 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
*/
void tipc_bclink_recv_pkt(struct sk_buff *buf)
-{
+{
+#if (TIPC_BCAST_LOSS_RATE)
+ static int rx_count = 0;
+#endif
struct tipc_msg *msg = buf_msg(buf);
struct node* node = tipc_node_find(msg_prevnode(msg));
u32 next_in;
@@ -420,9 +437,13 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
tipc_node_lock(node);
tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
tipc_node_unlock(node);
+ spin_lock_bh(&bc_lock);
bcl->stats.recv_nacks++;
+ bcl->owner->next = node; /* remember requestor */
bclink_retransmit_pkt(msg_bcgap_after(msg),
msg_bcgap_to(msg));
+ bcl->owner->next = NULL;
+ spin_unlock_bh(&bc_lock);
} else {
tipc_bclink_peek_nack(msg_destnode(msg),
msg_bcast_tag(msg),
@@ -433,6 +454,14 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
return;
}
+#if (TIPC_BCAST_LOSS_RATE)
+ if (++rx_count == TIPC_BCAST_LOSS_RATE) {
+ rx_count = 0;
+ buf_discard(buf);
+ return;
+ }
+#endif
+
tipc_node_lock(node);
receive:
deferred = node->bclink.deferred_head;
@@ -531,12 +560,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
{
static int send_count = 0;
- struct node_map *remains;
- struct node_map *remains_new;
- struct node_map *remains_tmp;
int bp_index;
int swap_time;
- int err;
/* Prepare buffer for broadcasting (if first time trying to send it) */
@@ -557,9 +582,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
/* Send buffer over bearers until all targets reached */
- remains = kmalloc(sizeof(struct node_map), GFP_ATOMIC);
- remains_new = kmalloc(sizeof(struct node_map), GFP_ATOMIC);
- *remains = tipc_cltr_bcast_nodes;
+ bcbearer->remains = tipc_cltr_bcast_nodes;
for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
struct bearer *p = bcbearer->bpairs[bp_index].primary;
@@ -568,8 +591,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
if (!p)
break; /* no more bearers to try */
- tipc_nmap_diff(remains, &p->nodes, remains_new);
- if (remains_new->count == remains->count)
+ tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new);
+ if (bcbearer->remains_new.count == bcbearer->remains.count)
continue; /* bearer pair doesn't add anything */
if (!p->publ.blocked &&
@@ -587,27 +610,17 @@ swap:
bcbearer->bpairs[bp_index].primary = s;
bcbearer->bpairs[bp_index].secondary = p;
update:
- if (remains_new->count == 0) {
- err = TIPC_OK;
- goto out;
- }
+ if (bcbearer->remains_new.count == 0)
+ return TIPC_OK;
- /* swap map */
- remains_tmp = remains;
- remains = remains_new;
- remains_new = remains_tmp;
+ bcbearer->remains = bcbearer->remains_new;
}
/* Unable to reach all targets */
bcbearer->bearer.publ.blocked = 1;
bcl->stats.bearer_congs++;
- err = ~TIPC_OK;
-
- out:
- kfree(remains_new);
- kfree(remains);
- return err;
+ return ~TIPC_OK;
}
/**
@@ -765,7 +778,7 @@ int tipc_bclink_init(void)
bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC);
if (!bcbearer || !bclink) {
nomem:
- warn("Memory squeeze; Failed to create multicast link\n");
+ warn("Multicast link creation failed, no memory\n");
kfree(bcbearer);
bcbearer = NULL;
kfree(bclink);
@@ -783,7 +796,7 @@ int tipc_bclink_init(void)
memset(bclink, 0, sizeof(struct bclink));
INIT_LIST_HEAD(&bcl->waiting_ports);
bcl->next_out_no = 1;
- bclink->node.lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&bclink->node.lock);
bcl->owner = &bclink->node;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 0e3be2ab3307..b243d9d495f0 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -180,7 +180,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
if (!item->next) {
item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
if (!item->next) {
- warn("Memory squeeze: multicast destination port list is incomplete\n");
+ warn("Incomplete multicast delivery, no memory\n");
return;
}
item->next->next = NULL;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e213a8e54855..75a5968c2139 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -112,39 +112,42 @@ int tipc_register_media(u32 media_type,
goto exit;
if (!media_name_valid(name)) {
- warn("Media registration error: illegal name <%s>\n", name);
+ warn("Media <%s> rejected, illegal name\n", name);
goto exit;
}
if (!bcast_addr) {
- warn("Media registration error: no broadcast address supplied\n");
+ warn("Media <%s> rejected, no broadcast address\n", name);
goto exit;
}
if ((bearer_priority < TIPC_MIN_LINK_PRI) &&
(bearer_priority > TIPC_MAX_LINK_PRI)) {
- warn("Media registration error: priority %u\n", bearer_priority);
+ warn("Media <%s> rejected, illegal priority (%u)\n", name,
+ bearer_priority);
goto exit;
}
if ((link_tolerance < TIPC_MIN_LINK_TOL) ||
(link_tolerance > TIPC_MAX_LINK_TOL)) {
- warn("Media registration error: tolerance %u\n", link_tolerance);
+ warn("Media <%s> rejected, illegal tolerance (%u)\n", name,
+ link_tolerance);
goto exit;
}
media_id = media_count++;
if (media_id >= MAX_MEDIA) {
- warn("Attempt to register more than %u media\n", MAX_MEDIA);
+ warn("Media <%s> rejected, media limit reached (%u)\n", name,
+ MAX_MEDIA);
media_count--;
goto exit;
}
for (i = 0; i < media_id; i++) {
if (media_list[i].type_id == media_type) {
- warn("Attempt to register second media with type %u\n",
+ warn("Media <%s> rejected, duplicate type (%u)\n", name,
media_type);
media_count--;
goto exit;
}
if (!strcmp(name, media_list[i].name)) {
- warn("Attempt to re-register media name <%s>\n", name);
+ warn("Media <%s> rejected, duplicate name\n", name);
media_count--;
goto exit;
}
@@ -283,6 +286,9 @@ static struct bearer *bearer_find(const char *name)
struct bearer *b_ptr;
u32 i;
+ if (tipc_mode != TIPC_NET_MODE)
+ return NULL;
+
for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
return b_ptr;
@@ -475,26 +481,33 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
u32 i;
int res = -EINVAL;
- if (tipc_mode != TIPC_NET_MODE)
+ if (tipc_mode != TIPC_NET_MODE) {
+ warn("Bearer <%s> rejected, not supported in standalone mode\n",
+ name);
return -ENOPROTOOPT;
-
- if (!bearer_name_validate(name, &b_name) ||
- !tipc_addr_domain_valid(bcast_scope) ||
- !in_scope(bcast_scope, tipc_own_addr))
+ }
+ if (!bearer_name_validate(name, &b_name)) {
+ warn("Bearer <%s> rejected, illegal name\n", name);
return -EINVAL;
-
+ }
+ if (!tipc_addr_domain_valid(bcast_scope) ||
+ !in_scope(bcast_scope, tipc_own_addr)) {
+ warn("Bearer <%s> rejected, illegal broadcast scope\n", name);
+ return -EINVAL;
+ }
if ((priority < TIPC_MIN_LINK_PRI ||
priority > TIPC_MAX_LINK_PRI) &&
- (priority != TIPC_MEDIA_LINK_PRI))
+ (priority != TIPC_MEDIA_LINK_PRI)) {
+ warn("Bearer <%s> rejected, illegal priority\n", name);
return -EINVAL;
+ }
write_lock_bh(&tipc_net_lock);
- if (!tipc_bearers)
- goto failed;
m_ptr = media_find(b_name.media_name);
if (!m_ptr) {
- warn("No media <%s>\n", b_name.media_name);
+ warn("Bearer <%s> rejected, media <%s> not registered\n", name,
+ b_name.media_name);
goto failed;
}
@@ -510,23 +523,24 @@ restart:
continue;
}
if (!strcmp(name, tipc_bearers[i].publ.name)) {
- warn("Bearer <%s> already enabled\n", name);
+ warn("Bearer <%s> rejected, already enabled\n", name);
goto failed;
}
if ((tipc_bearers[i].priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
- warn("Third bearer <%s> with priority %u, unable to lower to %u\n",
- name, priority + 1, priority);
+ warn("Bearer <%s> rejected, duplicate priority\n",
+ name);
goto failed;
}
- warn("Third bearer <%s> with priority %u, lowering to %u\n",
+ warn("Bearer <%s> priority adjustment required %u->%u\n",
name, priority + 1, priority);
goto restart;
}
}
if (bearer_id >= MAX_BEARERS) {
- warn("Attempt to enable more than %d bearers\n", MAX_BEARERS);
+ warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
+ name, MAX_BEARERS);
goto failed;
}
@@ -536,7 +550,7 @@ restart:
strcpy(b_ptr->publ.name, name);
res = m_ptr->enable_bearer(&b_ptr->publ);
if (res) {
- warn("Failed to enable bearer <%s>\n", name);
+ warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
goto failed;
}
@@ -552,7 +566,7 @@ restart:
b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr,
bcast_scope, 2);
}
- b_ptr->publ.lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&b_ptr->publ.lock);
write_unlock_bh(&tipc_net_lock);
info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name, addr_string_fill(addr_string, bcast_scope), priority);
@@ -573,9 +587,6 @@ int tipc_block_bearer(const char *name)
struct link *l_ptr;
struct link *temp_l_ptr;
- if (tipc_mode != TIPC_NET_MODE)
- return -ENOPROTOOPT;
-
read_lock_bh(&tipc_net_lock);
b_ptr = bearer_find(name);
if (!b_ptr) {
@@ -584,6 +595,7 @@ int tipc_block_bearer(const char *name)
return -EINVAL;
}
+ info("Blocking bearer <%s>\n", name);
spin_lock_bh(&b_ptr->publ.lock);
b_ptr->publ.blocked = 1;
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
@@ -595,7 +607,6 @@ int tipc_block_bearer(const char *name)
}
spin_unlock_bh(&b_ptr->publ.lock);
read_unlock_bh(&tipc_net_lock);
- info("Blocked bearer <%s>\n", name);
return TIPC_OK;
}
@@ -611,15 +622,13 @@ static int bearer_disable(const char *name)
struct link *l_ptr;
struct link *temp_l_ptr;
- if (tipc_mode != TIPC_NET_MODE)
- return -ENOPROTOOPT;
-
b_ptr = bearer_find(name);
if (!b_ptr) {
warn("Attempt to disable unknown bearer <%s>\n", name);
return -EINVAL;
}
+ info("Disabling bearer <%s>\n", name);
tipc_disc_stop_link_req(b_ptr->link_req);
spin_lock_bh(&b_ptr->publ.lock);
b_ptr->link_req = NULL;
@@ -635,7 +644,6 @@ static int bearer_disable(const char *name)
tipc_link_delete(l_ptr);
}
spin_unlock_bh(&b_ptr->publ.lock);
- info("Disabled bearer <%s>\n", name);
memset(b_ptr, 0, sizeof(struct bearer));
return TIPC_OK;
}
@@ -657,11 +665,9 @@ int tipc_bearer_init(void)
int res;
write_lock_bh(&tipc_net_lock);
- tipc_bearers = kmalloc(MAX_BEARERS * sizeof(struct bearer), GFP_ATOMIC);
- media_list = kmalloc(MAX_MEDIA * sizeof(struct media), GFP_ATOMIC);
+ tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
+ media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
if (tipc_bearers && media_list) {
- memset(tipc_bearers, 0, MAX_BEARERS * sizeof(struct bearer));
- memset(media_list, 0, MAX_MEDIA * sizeof(struct media));
res = TIPC_OK;
} else {
kfree(tipc_bearers);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 1aed81584e96..b46b5188a9fd 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -57,43 +57,43 @@ struct cluster *tipc_cltr_create(u32 addr)
struct _zone *z_ptr;
struct cluster *c_ptr;
int max_nodes;
- int alloc;
- c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC);
- if (c_ptr == NULL)
+ c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC);
+ if (c_ptr == NULL) {
+ warn("Cluster creation failure, no memory\n");
return NULL;
- memset(c_ptr, 0, sizeof(*c_ptr));
+ }
c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
if (in_own_cluster(addr))
max_nodes = LOWEST_SLAVE + tipc_max_slaves;
else
max_nodes = tipc_max_nodes + 1;
- alloc = sizeof(void *) * (max_nodes + 1);
- c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC);
+
+ c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
if (c_ptr->nodes == NULL) {
+ warn("Cluster creation failure, no memory for node area\n");
kfree(c_ptr);
return NULL;
}
- memset(c_ptr->nodes, 0, alloc);
+
if (in_own_cluster(addr))
tipc_local_nodes = c_ptr->nodes;
c_ptr->highest_slave = LOWEST_SLAVE - 1;
c_ptr->highest_node = 0;
z_ptr = tipc_zone_find(tipc_zone(addr));
- if (z_ptr == NULL) {
+ if (!z_ptr) {
z_ptr = tipc_zone_create(addr);
}
- if (z_ptr != NULL) {
- tipc_zone_attach_cluster(z_ptr, c_ptr);
- c_ptr->owner = z_ptr;
- }
- else {
+ if (!z_ptr) {
+ kfree(c_ptr->nodes);
kfree(c_ptr);
- c_ptr = NULL;
+ return NULL;
}
+ tipc_zone_attach_cluster(z_ptr, c_ptr);
+ c_ptr->owner = z_ptr;
return c_ptr;
}
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 48b5de2dbe60..285e1bc2d880 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -63,7 +63,7 @@ struct manager {
static struct manager mng = { 0};
-static spinlock_t config_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(config_lock);
static const void *req_tlv_area; /* request message TLV area */
static int req_tlv_space; /* request message TLV area size */
@@ -291,13 +291,22 @@ static struct sk_buff *cfg_set_own_addr(void)
if (!tipc_addr_node_valid(addr))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (node address)");
- if (tipc_own_addr)
+ if (tipc_mode == TIPC_NET_MODE)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot change node address once assigned)");
+ tipc_own_addr = addr;
+
+ /*
+ * Must release all spinlocks before calling start_net() because
+ * Linux version of TIPC calls eth_media_start() which calls
+ * register_netdevice_notifier() which may block!
+ *
+ * Temporarily releasing the lock should be harmless for non-Linux TIPC,
+ * but Linux version of eth_media_start() should really be reworked
+ * so that it can be called with spinlocks held.
+ */
spin_unlock_bh(&config_lock);
- tipc_core_stop_net();
- tipc_own_addr = addr;
tipc_core_start_net();
spin_lock_bh(&config_lock);
return tipc_cfg_reply_none();
@@ -350,50 +359,21 @@ static struct sk_buff *cfg_set_max_subscriptions(void)
static struct sk_buff *cfg_set_max_ports(void)
{
- int orig_mode;
u32 value;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
value = *(u32 *)TLV_DATA(req_tlv_area);
value = ntohl(value);
+ if (value == tipc_max_ports)
+ return tipc_cfg_reply_none();
if (value != delimit(value, 127, 65535))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max ports must be 127-65535)");
-
- if (value == tipc_max_ports)
- return tipc_cfg_reply_none();
-
- if (atomic_read(&tipc_user_count) > 2)
+ if (tipc_mode != TIPC_NOT_RUNNING)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change max ports while TIPC users exist)");
-
- spin_unlock_bh(&config_lock);
- orig_mode = tipc_get_mode();
- if (orig_mode == TIPC_NET_MODE)
- tipc_core_stop_net();
- tipc_core_stop();
+ " (cannot change max ports while TIPC is active)");
tipc_max_ports = value;
- tipc_core_start();
- if (orig_mode == TIPC_NET_MODE)
- tipc_core_start_net();
- spin_lock_bh(&config_lock);
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *set_net_max(int value, int *parameter)
-{
- int orig_mode;
-
- if (value != *parameter) {
- orig_mode = tipc_get_mode();
- if (orig_mode == TIPC_NET_MODE)
- tipc_core_stop_net();
- *parameter = value;
- if (orig_mode == TIPC_NET_MODE)
- tipc_core_start_net();
- }
-
return tipc_cfg_reply_none();
}
@@ -405,10 +385,16 @@ static struct sk_buff *cfg_set_max_zones(void)
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
value = *(u32 *)TLV_DATA(req_tlv_area);
value = ntohl(value);
+ if (value == tipc_max_zones)
+ return tipc_cfg_reply_none();
if (value != delimit(value, 1, 255))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max zones must be 1-255)");
- return set_net_max(value, &tipc_max_zones);
+ if (tipc_mode == TIPC_NET_MODE)
+ return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+ " (cannot change max zones once TIPC has joined a network)");
+ tipc_max_zones = value;
+ return tipc_cfg_reply_none();
}
static struct sk_buff *cfg_set_max_clusters(void)
@@ -419,8 +405,8 @@ static struct sk_buff *cfg_set_max_clusters(void)
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
value = *(u32 *)TLV_DATA(req_tlv_area);
value = ntohl(value);
- if (value != 1)
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+ if (value != delimit(value, 1, 1))
+ return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max clusters fixed at 1)");
return tipc_cfg_reply_none();
}
@@ -433,10 +419,16 @@ static struct sk_buff *cfg_set_max_nodes(void)
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
value = *(u32 *)TLV_DATA(req_tlv_area);
value = ntohl(value);
+ if (value == tipc_max_nodes)
+ return tipc_cfg_reply_none();
if (value != delimit(value, 8, 2047))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max nodes must be 8-2047)");
- return set_net_max(value, &tipc_max_nodes);
+ if (tipc_mode == TIPC_NET_MODE)
+ return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+ " (cannot change max nodes once TIPC has joined a network)");
+ tipc_max_nodes = value;
+ return tipc_cfg_reply_none();
}
static struct sk_buff *cfg_set_max_slaves(void)
@@ -461,15 +453,16 @@ static struct sk_buff *cfg_set_netid(void)
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
value = *(u32 *)TLV_DATA(req_tlv_area);
value = ntohl(value);
+ if (value == tipc_net_id)
+ return tipc_cfg_reply_none();
if (value != delimit(value, 1, 9999))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network id must be 1-9999)");
-
- if (tipc_own_addr)
+ if (tipc_mode == TIPC_NET_MODE)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change network id once part of network)");
-
- return set_net_max(value, &tipc_net_id);
+ " (cannot change network id once TIPC has joined a network)");
+ tipc_net_id = value;
+ return tipc_cfg_reply_none();
}
struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area,
@@ -649,7 +642,7 @@ static void cfg_named_msg_event(void *userdata,
if ((size < sizeof(*req_hdr)) ||
(size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
(ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
- warn("discarded invalid configuration message\n");
+ warn("Invalid configuration message discarded\n");
return;
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3d0a8ee4e1d3..0539a8362858 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
* net/tipc/core.c: TIPC module code
*
* Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,7 @@ void tipc_socket_stop(void);
int tipc_netlink_start(void);
void tipc_netlink_stop(void);
-#define MOD_NAME "tipc_start: "
+#define TIPC_MOD_VER "1.6.1"
#ifndef CONFIG_TIPC_ZONES
#define CONFIG_TIPC_ZONES 3
@@ -191,14 +191,15 @@ static int __init tipc_init(void)
int res;
tipc_log_reinit(CONFIG_TIPC_LOG);
- info("Activated (compiled " __DATE__ " " __TIME__ ")\n");
+ info("Activated (version " TIPC_MOD_VER
+ " compiled " __DATE__ " " __TIME__ ")\n");
tipc_own_addr = 0;
tipc_remote_management = 1;
tipc_max_publications = 10000;
tipc_max_subscriptions = 2000;
tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536);
- tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 511);
+ tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 255);
tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1);
tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047);
tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047);
@@ -224,6 +225,7 @@ module_exit(tipc_exit);
MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication");
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(TIPC_MOD_VER);
/* Native TIPC API for kernel-space applications (see tipc.h) */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1f2e8b27a13f..762aac2572be 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
* net/tipc/core.h: Include file for TIPC global declarations
*
* Copyright (c) 2005-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -111,10 +111,6 @@ void tipc_dump(struct print_buf*,const char *fmt, ...);
#else
-#ifndef DBG_OUTPUT
-#define DBG_OUTPUT NULL
-#endif
-
/*
* TIPC debug support not included:
* - system messages are printed to system console
@@ -129,6 +125,19 @@ void tipc_dump(struct print_buf*,const char *fmt, ...);
#define msg_dbg(msg,txt) do {} while (0)
#define dump(fmt,arg...) do {} while (0)
+
+/*
+ * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is
+ * the null print buffer. Thes ensures that any system or debug messages
+ * that are generated without using the above macros are handled correctly.
+ */
+
+#undef TIPC_OUTPUT
+#define TIPC_OUTPUT TIPC_CONS
+
+#undef DBG_OUTPUT
+#define DBG_OUTPUT NULL
+
#endif
@@ -288,7 +297,10 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
* buf_acquire - creates a TIPC message buffer
* @size: message size (including TIPC header)
*
- * Returns a new buffer. Space is reserved for a data link header.
+ * Returns a new buffer with data pointers set to the specified size.
+ *
+ * NOTE: Headroom is reserved to allow prepending of a data link header.
+ * There may also be unrequested tailroom present at the buffer's end.
*/
static inline struct sk_buff *buf_acquire(u32 size)
@@ -309,7 +321,7 @@ static inline struct sk_buff *buf_acquire(u32 size)
* buf_discard - frees a TIPC message buffer
* @skb: message buffer
*
- * Frees a new buffer. If passed NULL, just returns.
+ * Frees a message buffer. If passed NULL, just returns.
*/
static inline void buf_discard(struct sk_buff *skb)
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 26ef95d5fe38..55130655e1ed 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -41,7 +41,7 @@
#define MAX_STRING 512
static char print_string[MAX_STRING];
-static spinlock_t print_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(print_lock);
static struct print_buf cons_buf = { NULL, 0, NULL, NULL };
struct print_buf *TIPC_CONS = &cons_buf;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 92601385e5f5..ee94de92ae99 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -2,7 +2,7 @@
* net/tipc/discover.c
*
* Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -176,7 +176,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf)
n_ptr = tipc_node_create(orig);
}
if (n_ptr == NULL) {
- warn("Memory squeeze; Failed to create node\n");
return;
}
spin_lock_bh(&n_ptr->lock);
@@ -191,10 +190,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf)
}
addr = &link->media_addr;
if (memcmp(addr, &media_addr, sizeof(*addr))) {
- char addr_string[16];
-
- warn("New bearer address for %s\n",
- addr_string_fill(addr_string, orig));
+ warn("Resetting link <%s>, peer interface address changed\n",
+ link->name);
memcpy(addr, &media_addr, sizeof(*addr));
tipc_link_reset(link);
}
@@ -270,8 +267,8 @@ static void disc_timeout(struct link_req *req)
/* leave timer interval "as is" if already at a "normal" rate */
} else {
req->timer_intv *= 2;
- if (req->timer_intv > TIPC_LINK_REQ_SLOW)
- req->timer_intv = TIPC_LINK_REQ_SLOW;
+ if (req->timer_intv > TIPC_LINK_REQ_FAST)
+ req->timer_intv = TIPC_LINK_REQ_FAST;
if ((req->timer_intv == TIPC_LINK_REQ_FAST) &&
(req->bearer->nodes.count))
req->timer_intv = TIPC_LINK_REQ_SLOW;
@@ -298,7 +295,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
{
struct link_req *req;
- req = (struct link_req *)kmalloc(sizeof(*req), GFP_ATOMIC);
+ req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return NULL;
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 7a252785f727..682da4a28041 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -2,7 +2,7 @@
* net/tipc/eth_media.c: Ethernet bearer support for TIPC
*
* Copyright (c) 2001-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -98,17 +98,19 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
u32 size;
if (likely(eb_ptr->bearer)) {
- size = msg_size((struct tipc_msg *)buf->data);
- skb_trim(buf, size);
- if (likely(buf->len == size)) {
- buf->next = NULL;
- tipc_recv_msg(buf, eb_ptr->bearer);
- } else {
- kfree_skb(buf);
+ if (likely(!dev->promiscuity) ||
+ !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
+ !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+ size = msg_size((struct tipc_msg *)buf->data);
+ skb_trim(buf, size);
+ if (likely(buf->len == size)) {
+ buf->next = NULL;
+ tipc_recv_msg(buf, eb_ptr->bearer);
+ return TIPC_OK;
+ }
}
- } else {
- kfree_skb(buf);
}
+ kfree_skb(buf);
return TIPC_OK;
}
@@ -125,8 +127,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
/* Find device with specified name */
- while (dev && dev->name &&
- (memcmp(dev->name, driver_name, strlen(dev->name)))) {
+ while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) {
dev = dev->next;
}
if (!dev)
@@ -252,7 +253,9 @@ int tipc_eth_media_start(void)
if (eth_started)
return -EINVAL;
- memset(&bcast_addr, 0xff, sizeof(bcast_addr));
+ bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH);
+ memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN);
+
memset(eth_bearers, 0, sizeof(eth_bearers));
res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth",
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 966f70a1b608..ae6ddf00a1aa 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -44,7 +44,7 @@ struct queue_item {
static kmem_cache_t *tipc_queue_item_cache;
static struct list_head signal_queue_head;
-static spinlock_t qitem_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(qitem_lock);
static int handler_enabled = 0;
static void process_signal_queue(unsigned long dummy);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 784b24b6d102..693f02eca6d6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2,7 +2,7 @@
* net/tipc/link.c: TIPC link code
*
* Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 2004-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -417,12 +417,11 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
struct tipc_msg *msg;
char *if_name;
- l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC);
+ l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
if (!l_ptr) {
- warn("Memory squeeze; Failed to create link\n");
+ warn("Link creation failed, no memory\n");
return NULL;
}
- memset(l_ptr, 0, sizeof(*l_ptr));
l_ptr->addr = peer;
if_name = strchr(b_ptr->publ.name, ':') + 1;
@@ -469,7 +468,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
if (!pb) {
kfree(l_ptr);
- warn("Memory squeeze; Failed to create link\n");
+ warn("Link creation failed, no memory for print buffer\n");
return NULL;
}
tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
@@ -574,7 +573,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
break;
list_del_init(&p_ptr->wait_list);
p_ptr->congested_link = NULL;
- assert(p_ptr->wakeup);
spin_lock_bh(p_ptr->publ.lock);
p_ptr->publ.congested = 0;
p_ptr->wakeup(&p_ptr->publ);
@@ -691,6 +689,7 @@ void tipc_link_reset(struct link *l_ptr)
struct sk_buff *buf;
u32 prev_state = l_ptr->state;
u32 checkpoint = l_ptr->next_in_no;
+ int was_active_link = tipc_link_is_active(l_ptr);
msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1);
@@ -712,7 +711,7 @@ void tipc_link_reset(struct link *l_ptr)
tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
dbg_link_dump();
#endif
- if (tipc_node_has_active_links(l_ptr->owner) &&
+ if (was_active_link && tipc_node_has_active_links(l_ptr->owner) &&
l_ptr->owner->permit_changeover) {
l_ptr->reset_checkpoint = checkpoint;
l_ptr->exp_msg_count = START_CHANGEOVER;
@@ -755,7 +754,7 @@ void tipc_link_reset(struct link *l_ptr)
static void link_activate(struct link *l_ptr)
{
- l_ptr->next_in_no = 1;
+ l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
tipc_node_link_up(l_ptr->owner, l_ptr);
tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
link_send_event(tipc_cfg_link_event, l_ptr, 1);
@@ -820,6 +819,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
break;
case RESET_MSG:
dbg_link("RES -> RR\n");
+ info("Resetting link <%s>, requested by peer\n",
+ l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
@@ -844,6 +845,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
break;
case RESET_MSG:
dbg_link("RES -> RR\n");
+ info("Resetting link <%s>, requested by peer "
+ "while probing\n", l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
@@ -875,6 +878,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
} else { /* Link has failed */
dbg_link("-> RU (%u probes unanswered)\n",
l_ptr->fsm_msg_cnt);
+ warn("Resetting link <%s>, peer not responding\n",
+ l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
@@ -982,17 +987,20 @@ static int link_bundle_buf(struct link *l_ptr,
struct tipc_msg *bundler_msg = buf_msg(bundler);
struct tipc_msg *msg = buf_msg(buf);
u32 size = msg_size(msg);
- u32 to_pos = align(msg_size(bundler_msg));
- u32 rest = link_max_pkt(l_ptr) - to_pos;
+ u32 bundle_size = msg_size(bundler_msg);
+ u32 to_pos = align(bundle_size);
+ u32 pad = to_pos - bundle_size;
if (msg_user(bundler_msg) != MSG_BUNDLER)
return 0;
if (msg_type(bundler_msg) != OPEN_MSG)
return 0;
- if (rest < align(size))
+ if (skb_tailroom(bundler) < (pad + size))
+ return 0;
+ if (link_max_pkt(l_ptr) < (to_pos + size))
return 0;
- skb_put(bundler, (to_pos - msg_size(bundler_msg)) + size);
+ skb_put(bundler, pad + size);
memcpy(bundler->data + to_pos, buf->data, size);
msg_set_size(bundler_msg, to_pos + size);
msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
@@ -1050,7 +1058,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
msg_dbg(msg, "TIPC: Congestion, throwing away\n");
buf_discard(buf);
if (imp > CONN_MANAGER) {
- warn("Resetting <%s>, send queue full", l_ptr->name);
+ warn("Resetting link <%s>, send queue full", l_ptr->name);
tipc_link_reset(l_ptr);
}
return dsz;
@@ -1135,9 +1143,13 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
if (n_ptr) {
tipc_node_lock(n_ptr);
l_ptr = n_ptr->active_links[selector & 1];
- dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
if (l_ptr) {
+ dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
res = tipc_link_send_buf(l_ptr, buf);
+ } else {
+ dbg("Attempt to send msg to unreachable node:\n");
+ msg_dbg(buf_msg(buf),">>>");
+ buf_discard(buf);
}
tipc_node_unlock(n_ptr);
} else {
@@ -1242,8 +1254,6 @@ int tipc_link_send_sections_fast(struct port *sender,
int res;
u32 selector = msg_origport(hdr) & 1;
- assert(destaddr != tipc_own_addr);
-
again:
/*
* Try building message using port's max_pkt hint.
@@ -1604,40 +1614,121 @@ void tipc_link_push_queue(struct link *l_ptr)
tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
}
+static void link_reset_all(unsigned long addr)
+{
+ struct node *n_ptr;
+ char addr_string[16];
+ u32 i;
+
+ read_lock_bh(&tipc_net_lock);
+ n_ptr = tipc_node_find((u32)addr);
+ if (!n_ptr) {
+ read_unlock_bh(&tipc_net_lock);
+ return; /* node no longer exists */
+ }
+
+ tipc_node_lock(n_ptr);
+
+ warn("Resetting all links to %s\n",
+ addr_string_fill(addr_string, n_ptr->addr));
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (n_ptr->links[i]) {
+ link_print(n_ptr->links[i], TIPC_OUTPUT,
+ "Resetting link\n");
+ tipc_link_reset(n_ptr->links[i]);
+ }
+ }
+
+ tipc_node_unlock(n_ptr);
+ read_unlock_bh(&tipc_net_lock);
+}
+
+static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+
+ warn("Retransmission failure on link <%s>\n", l_ptr->name);
+ tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>");
+
+ if (l_ptr->addr) {
+
+ /* Handle failure on standard link */
+
+ link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n");
+ tipc_link_reset(l_ptr);
+
+ } else {
+
+ /* Handle failure on broadcast link */
+
+ struct node *n_ptr;
+ char addr_string[16];
+
+ tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg));
+ tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle);
+
+ n_ptr = l_ptr->owner->next;
+ tipc_node_lock(n_ptr);
+
+ addr_string_fill(addr_string, n_ptr->addr);
+ tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
+ tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported);
+ tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
+ tipc_printf(TIPC_OUTPUT, "Last in: %u, ", n_ptr->bclink.last_in);
+ tipc_printf(TIPC_OUTPUT, "Gap after: %u, ", n_ptr->bclink.gap_after);
+ tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to);
+ tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync);
+
+ tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
+
+ tipc_node_unlock(n_ptr);
+
+ l_ptr->stale_count = 0;
+ }
+}
+
void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
u32 retransmits)
{
struct tipc_msg *msg;
+ if (!buf)
+ return;
+
+ msg = buf_msg(buf);
+
dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
- if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && buf && !skb_cloned(buf)) {
- msg_dbg(buf_msg(buf), ">NO_RETR->BCONG>");
- dbg_print_link(l_ptr, " ");
- l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf));
- l_ptr->retransm_queue_size = retransmits;
- return;
+ if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
+ if (!skb_cloned(buf)) {
+ msg_dbg(msg, ">NO_RETR->BCONG>");
+ dbg_print_link(l_ptr, " ");
+ l_ptr->retransm_queue_head = msg_seqno(msg);
+ l_ptr->retransm_queue_size = retransmits;
+ return;
+ } else {
+ /* Don't retransmit if driver already has the buffer */
+ }
+ } else {
+ /* Detect repeated retransmit failures on uncongested bearer */
+
+ if (l_ptr->last_retransmitted == msg_seqno(msg)) {
+ if (++l_ptr->stale_count > 100) {
+ link_retransmit_failure(l_ptr, buf);
+ return;
+ }
+ } else {
+ l_ptr->last_retransmitted = msg_seqno(msg);
+ l_ptr->stale_count = 1;
+ }
}
+
while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
msg = buf_msg(buf);
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- /* Catch if retransmissions fail repeatedly: */
- if (l_ptr->last_retransmitted == msg_seqno(msg)) {
- if (++l_ptr->stale_count > 100) {
- tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>");
- info("...Retransmitted %u times\n",
- l_ptr->stale_count);
- link_print(l_ptr, TIPC_CONS, "Resetting Link\n");
- tipc_link_reset(l_ptr);
- break;
- }
- } else {
- l_ptr->stale_count = 0;
- }
- l_ptr->last_retransmitted = msg_seqno(msg);
-
msg_dbg(buf_msg(buf), ">RETR>");
buf = buf->next;
retransmits--;
@@ -1650,6 +1741,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
return;
}
}
+
l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
}
@@ -1720,6 +1812,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
link_recv_non_seq(buf);
continue;
}
+
+ if (unlikely(!msg_short(msg) &&
+ (msg_destnode(msg) != tipc_own_addr)))
+ goto cont;
+
n_ptr = tipc_node_find(msg_prevnode(msg));
if (unlikely(!n_ptr))
goto cont;
@@ -2140,7 +2237,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
if (msg_linkprio(msg) &&
(msg_linkprio(msg) != l_ptr->priority)) {
- warn("Changing prio <%s>: %u->%u\n",
+ warn("Resetting link <%s>, priority change %u->%u\n",
l_ptr->name, l_ptr->priority, msg_linkprio(msg));
l_ptr->priority = msg_linkprio(msg);
tipc_link_reset(l_ptr); /* Enforce change to take effect */
@@ -2209,17 +2306,22 @@ void tipc_link_tunnel(struct link *l_ptr,
u32 length = msg_size(msg);
tunnel = l_ptr->owner->active_links[selector & 1];
- if (!tipc_link_is_up(tunnel))
+ if (!tipc_link_is_up(tunnel)) {
+ warn("Link changeover error, "
+ "tunnel link no longer available\n");
return;
+ }
msg_set_size(tunnel_hdr, length + INT_H_SIZE);
buf = buf_acquire(length + INT_H_SIZE);
- if (!buf)
+ if (!buf) {
+ warn("Link changeover error, "
+ "unable to send tunnel msg\n");
return;
+ }
memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
msg_dbg(buf_msg(buf), ">SEND>");
- assert(tunnel);
tipc_link_send_buf(tunnel, buf);
}
@@ -2235,23 +2337,27 @@ void tipc_link_changeover(struct link *l_ptr)
u32 msgcount = l_ptr->out_queue_size;
struct sk_buff *crs = l_ptr->first_out;
struct link *tunnel = l_ptr->owner->active_links[0];
- int split_bundles = tipc_node_has_redundant_links(l_ptr->owner);
struct tipc_msg tunnel_hdr;
+ int split_bundles;
if (!tunnel)
return;
- if (!l_ptr->owner->permit_changeover)
+ if (!l_ptr->owner->permit_changeover) {
+ warn("Link changeover error, "
+ "peer did not permit changeover\n");
return;
+ }
msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
msg_set_msgcnt(&tunnel_hdr, msgcount);
+ dbg("Link changeover requires %u tunnel messages\n", msgcount);
+
if (!l_ptr->first_out) {
struct sk_buff *buf;
- assert(!msgcount);
buf = buf_acquire(INT_H_SIZE);
if (buf) {
memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
@@ -2261,10 +2367,15 @@ void tipc_link_changeover(struct link *l_ptr)
msg_dbg(&tunnel_hdr, "EMPTY>SEND>");
tipc_link_send_buf(tunnel, buf);
} else {
- warn("Memory squeeze; link changeover failed\n");
+ warn("Link changeover error, "
+ "unable to send changeover msg\n");
}
return;
}
+
+ split_bundles = (l_ptr->owner->active_links[0] !=
+ l_ptr->owner->active_links[1]);
+
while (crs) {
struct tipc_msg *msg = buf_msg(crs);
@@ -2310,7 +2421,8 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
outbuf = buf_acquire(length + INT_H_SIZE);
if (outbuf == NULL) {
- warn("Memory squeeze; buffer duplication failed\n");
+ warn("Link changeover error, "
+ "unable to send duplicate msg\n");
return;
}
memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
@@ -2364,11 +2476,15 @@ static int link_recv_changeover_msg(struct link **l_ptr,
u32 msg_count = msg_msgcnt(tunnel_msg);
dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
- assert(dest_link != *l_ptr);
if (!dest_link) {
msg_dbg(tunnel_msg, "NOLINK/<REC<");
goto exit;
}
+ if (dest_link == *l_ptr) {
+ err("Unexpected changeover message on link <%s>\n",
+ (*l_ptr)->name);
+ goto exit;
+ }
dbg("%c<-%c:", dest_link->b_ptr->net_plane,
(*l_ptr)->b_ptr->net_plane);
*l_ptr = dest_link;
@@ -2381,7 +2497,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
}
*buf = buf_extract(tunnel_buf,INT_H_SIZE);
if (*buf == NULL) {
- warn("Memory squeeze; failed to extract msg\n");
+ warn("Link changeover error, duplicate msg dropped\n");
goto exit;
}
msg_dbg(tunnel_msg, "TNL<REC<");
@@ -2393,13 +2509,17 @@ static int link_recv_changeover_msg(struct link **l_ptr,
if (tipc_link_is_up(dest_link)) {
msg_dbg(tunnel_msg, "UP/FIRST/<REC<");
+ info("Resetting link <%s>, changeover initiated by peer\n",
+ dest_link->name);
tipc_link_reset(dest_link);
dest_link->exp_msg_count = msg_count;
+ dbg("Expecting %u tunnelled messages\n", msg_count);
if (!msg_count)
goto exit;
} else if (dest_link->exp_msg_count == START_CHANGEOVER) {
msg_dbg(tunnel_msg, "BLK/FIRST/<REC<");
dest_link->exp_msg_count = msg_count;
+ dbg("Expecting %u tunnelled messages\n", msg_count);
if (!msg_count)
goto exit;
}
@@ -2407,6 +2527,8 @@ static int link_recv_changeover_msg(struct link **l_ptr,
/* Receive original message */
if (dest_link->exp_msg_count == 0) {
+ warn("Link switchover error, "
+ "got too many tunnelled messages\n");
msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<");
dbg_print_link(dest_link, "LINK:");
goto exit;
@@ -2422,7 +2544,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
buf_discard(tunnel_buf);
return 1;
} else {
- warn("Memory squeeze; dropped incoming msg\n");
+ warn("Link changeover error, original msg dropped\n");
}
}
exit:
@@ -2444,13 +2566,8 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
while (msgcount--) {
obuf = buf_extract(buf, pos);
if (obuf == NULL) {
- char addr_string[16];
-
- warn("Buffer allocation failure;\n");
- warn(" incoming message(s) from %s lost\n",
- addr_string_fill(addr_string,
- msg_orignode(buf_msg(buf))));
- return;
+ warn("Link unable to unbundle message(s)\n");
+ break;
};
pos += align(msg_size(buf_msg(obuf)));
msg_dbg(buf_msg(obuf), " /");
@@ -2508,7 +2625,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
}
fragm = buf_acquire(fragm_sz + INT_H_SIZE);
if (fragm == NULL) {
- warn("Memory squeeze; failed to fragment msg\n");
+ warn("Link unable to fragment message\n");
dsz = -ENOMEM;
goto exit;
}
@@ -2623,7 +2740,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
set_fragm_size(pbuf,fragm_sz);
set_expected_frags(pbuf,exp_fragm_cnt - 1);
} else {
- warn("Memory squeeze; got no defragmenting buffer\n");
+ warn("Link unable to reassemble fragmented message\n");
}
buf_discard(fbuf);
return 0;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index a3bbc891f959..f0b063bcc2a9 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -127,7 +127,7 @@ void tipc_named_publish(struct publication *publ)
buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
if (!buf) {
- warn("Memory squeeze; failed to distribute publication\n");
+ warn("Publication distribution failure\n");
return;
}
@@ -151,7 +151,7 @@ void tipc_named_withdraw(struct publication *publ)
buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
if (!buf) {
- warn("Memory squeeze; failed to distribute withdrawal\n");
+ warn("Withdrawl distribution failure\n");
return;
}
@@ -174,7 +174,6 @@ void tipc_named_node_up(unsigned long node)
u32 rest;
u32 max_item_buf;
- assert(in_own_cluster(node));
read_lock_bh(&tipc_nametbl_lock);
max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE;
max_item_buf *= ITEM_SIZE;
@@ -185,8 +184,8 @@ void tipc_named_node_up(unsigned long node)
left = (rest <= max_item_buf) ? rest : max_item_buf;
rest -= left;
buf = named_prepare_buf(PUBLICATION, left, node);
- if (buf == NULL) {
- warn("Memory Squeeze; could not send publication\n");
+ if (!buf) {
+ warn("Bulk publication distribution failure\n");
goto exit;
}
item = (struct distr_item *)msg_data(buf_msg(buf));
@@ -221,15 +220,24 @@ exit:
static void node_is_down(struct publication *publ)
{
struct publication *p;
+
write_lock_bh(&tipc_nametbl_lock);
dbg("node_is_down: withdrawing %u, %u, %u\n",
publ->type, publ->lower, publ->upper);
publ->key += 1222345;
p = tipc_nametbl_remove_publ(publ->type, publ->lower,
publ->node, publ->ref, publ->key);
- assert(p == publ);
write_unlock_bh(&tipc_nametbl_lock);
- kfree(publ);
+
+ if (p != publ) {
+ err("Unable to remove publication from failed node\n"
+ "(type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node, publ->ref, publ->key);
+ }
+
+ if (p) {
+ kfree(p);
+ }
}
/**
@@ -275,9 +283,15 @@ void tipc_named_recv(struct sk_buff *buf)
if (publ) {
tipc_nodesub_unsubscribe(&publ->subscr);
kfree(publ);
+ } else {
+ err("Unable to remove publication by node 0x%x\n"
+ "(type=%u, lower=%u, ref=%u, key=%u)\n",
+ msg_orignode(msg),
+ ntohl(item->type), ntohl(item->lower),
+ ntohl(item->ref), ntohl(item->key));
}
} else {
- warn("tipc_named_recv: unknown msg\n");
+ warn("Unrecognized name table message received\n");
}
item++;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index d129422fc5c2..049242ea5c38 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -71,7 +71,7 @@ struct sub_seq {
* @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
* sub-sequences are sorted in ascending order
* @alloc: number of sub-sequences currently in array
- * @first_free: upper bound of highest sub-sequence + 1
+ * @first_free: array index of first unused sub-sequence entry
* @ns_list: links to adjacent name sequences in hash chain
* @subscriptions: list of subscriptions for this 'type'
* @lock: spinlock controlling access to name sequence structure
@@ -101,7 +101,7 @@ struct name_table {
static struct name_table table = { NULL } ;
static atomic_t rsv_publ_ok = ATOMIC_INIT(0);
-rwlock_t tipc_nametbl_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(tipc_nametbl_lock);
static int hash(int x)
@@ -117,14 +117,12 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
u32 scope, u32 node, u32 port_ref,
u32 key)
{
- struct publication *publ =
- (struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC);
+ struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
if (publ == NULL) {
- warn("Memory squeeze; failed to create publication\n");
+ warn("Publication creation failure, no memory\n");
return NULL;
}
- memset(publ, 0, sizeof(*publ));
publ->type = type;
publ->lower = lower;
publ->upper = upper;
@@ -144,11 +142,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
static struct sub_seq *tipc_subseq_alloc(u32 cnt)
{
- u32 sz = cnt * sizeof(struct sub_seq);
- struct sub_seq *sseq = (struct sub_seq *)kmalloc(sz, GFP_ATOMIC);
-
- if (sseq)
- memset(sseq, 0, sz);
+ struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC);
return sseq;
}
@@ -160,22 +154,20 @@ static struct sub_seq *tipc_subseq_alloc(u32 cnt)
static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
{
- struct name_seq *nseq =
- (struct name_seq *)kmalloc(sizeof(*nseq), GFP_ATOMIC);
+ struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC);
struct sub_seq *sseq = tipc_subseq_alloc(1);
if (!nseq || !sseq) {
- warn("Memory squeeze; failed to create name sequence\n");
+ warn("Name sequence creation failed, no memory\n");
kfree(nseq);
kfree(sseq);
return NULL;
}
- memset(nseq, 0, sizeof(*nseq));
- nseq->lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&nseq->lock);
nseq->type = type;
nseq->sseqs = sseq;
- dbg("tipc_nameseq_create() nseq = %x type %u, ssseqs %x, ff: %u\n",
+ dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n",
nseq, type, nseq->sseqs, nseq->first_free);
nseq->alloc = 1;
INIT_HLIST_NODE(&nseq->ns_list);
@@ -253,16 +245,16 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
struct sub_seq *sseq;
int created_subseq = 0;
- assert(nseq->first_free <= nseq->alloc);
sseq = nameseq_find_subseq(nseq, lower);
- dbg("nameseq_ins: for seq %x,<%u,%u>, found sseq %x\n",
+ dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n",
nseq, type, lower, sseq);
if (sseq) {
/* Lower end overlaps existing entry => need an exact match */
if ((sseq->lower != lower) || (sseq->upper != upper)) {
- warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper);
+ warn("Cannot publish {%u,%u,%u}, overlap error\n",
+ type, lower, upper);
return NULL;
}
} else {
@@ -277,25 +269,27 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
if ((inspos < nseq->first_free) &&
(upper >= nseq->sseqs[inspos].lower)) {
- warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper);
+ warn("Cannot publish {%u,%u,%u}, overlap error\n",
+ type, lower, upper);
return NULL;
}
/* Ensure there is space for new sub-sequence */
if (nseq->first_free == nseq->alloc) {
- struct sub_seq *sseqs = nseq->sseqs;
- nseq->sseqs = tipc_subseq_alloc(nseq->alloc * 2);
- if (nseq->sseqs != NULL) {
- memcpy(nseq->sseqs, sseqs,
- nseq->alloc * sizeof (struct sub_seq));
- kfree(sseqs);
- dbg("Allocated %u sseqs\n", nseq->alloc);
- nseq->alloc *= 2;
- } else {
- warn("Memory squeeze; failed to create sub-sequence\n");
+ struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
+
+ if (!sseqs) {
+ warn("Cannot publish {%u,%u,%u}, no memory\n",
+ type, lower, upper);
return NULL;
}
+ dbg("Allocated %u more sseqs\n", nseq->alloc);
+ memcpy(sseqs, nseq->sseqs,
+ nseq->alloc * sizeof(struct sub_seq));
+ kfree(nseq->sseqs);
+ nseq->sseqs = sseqs;
+ nseq->alloc *= 2;
}
dbg("Have %u sseqs for type %u\n", nseq->alloc, type);
@@ -311,7 +305,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
sseq->upper = upper;
created_subseq = 1;
}
- dbg("inserting (%u %u %u) from %x:%u into sseq %x(%u,%u) of seq %x\n",
+ dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n",
type, lower, upper, node, port, sseq,
sseq->lower, sseq->upper, nseq);
@@ -320,7 +314,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
publ = publ_create(type, lower, upper, scope, node, port, key);
if (!publ)
return NULL;
- dbg("inserting publ %x, node=%x publ->node=%x, subscr->node=%x\n",
+ dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n",
publ, node, publ->node, publ->subscr.node);
if (!sseq->zone_list)
@@ -367,45 +361,47 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
/**
* tipc_nameseq_remove_publ -
+ *
+ * NOTE: There may be cases where TIPC is asked to remove a publication
+ * that is not in the name table. For example, if another node issues a
+ * publication for a name sequence that overlaps an existing name sequence
+ * the publication will not be recorded, which means the publication won't
+ * be found when the name sequence is later withdrawn by that node.
+ * A failed withdraw request simply returns a failure indication and lets the
+ * caller issue any error or warning messages associated with such a problem.
*/
static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst,
u32 node, u32 ref, u32 key)
{
struct publication *publ;
+ struct publication *curr;
struct publication *prev;
struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
struct sub_seq *free;
struct subscription *s, *st;
int removed_subseq = 0;
- assert(nseq);
-
- if (!sseq) {
- int i;
-
- warn("Withdraw unknown <%u,%u>?\n", nseq->type, inst);
- assert(nseq->sseqs);
- dbg("Dumping subseqs %x for %x, alloc = %u,ff=%u\n",
- nseq->sseqs, nseq, nseq->alloc,
- nseq->first_free);
- for (i = 0; i < nseq->first_free; i++) {
- dbg("Subseq %u(%x): lower = %u,upper = %u\n",
- i, &nseq->sseqs[i], nseq->sseqs[i].lower,
- nseq->sseqs[i].upper);
- }
+ if (!sseq)
return NULL;
- }
- dbg("nameseq_remove: seq: %x, sseq %x, <%u,%u> key %u\n",
+
+ dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n",
nseq, sseq, nseq->type, inst, key);
+ /* Remove publication from zone scope list */
+
prev = sseq->zone_list;
publ = sseq->zone_list->zone_list_next;
while ((publ->key != key) || (publ->ref != ref) ||
(publ->node && (publ->node != node))) {
prev = publ;
publ = publ->zone_list_next;
- assert(prev != sseq->zone_list);
+ if (prev == sseq->zone_list) {
+
+ /* Prevent endless loop if publication not found */
+
+ return NULL;
+ }
}
if (publ != sseq->zone_list)
prev->zone_list_next = publ->zone_list_next;
@@ -416,14 +412,24 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
sseq->zone_list = NULL;
}
+ /* Remove publication from cluster scope list, if present */
+
if (in_own_cluster(node)) {
prev = sseq->cluster_list;
- publ = sseq->cluster_list->cluster_list_next;
- while ((publ->key != key) || (publ->ref != ref) ||
- (publ->node && (publ->node != node))) {
- prev = publ;
- publ = publ->cluster_list_next;
- assert(prev != sseq->cluster_list);
+ curr = sseq->cluster_list->cluster_list_next;
+ while (curr != publ) {
+ prev = curr;
+ curr = curr->cluster_list_next;
+ if (prev == sseq->cluster_list) {
+
+ /* Prevent endless loop for malformed list */
+
+ err("Unable to de-list cluster publication\n"
+ "{%u%u}, node=0x%x, ref=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node,
+ publ->ref, publ->key);
+ goto end_cluster;
+ }
}
if (publ != sseq->cluster_list)
prev->cluster_list_next = publ->cluster_list_next;
@@ -434,15 +440,26 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
sseq->cluster_list = NULL;
}
}
+end_cluster:
+
+ /* Remove publication from node scope list, if present */
if (node == tipc_own_addr) {
prev = sseq->node_list;
- publ = sseq->node_list->node_list_next;
- while ((publ->key != key) || (publ->ref != ref) ||
- (publ->node && (publ->node != node))) {
- prev = publ;
- publ = publ->node_list_next;
- assert(prev != sseq->node_list);
+ curr = sseq->node_list->node_list_next;
+ while (curr != publ) {
+ prev = curr;
+ curr = curr->node_list_next;
+ if (prev == sseq->node_list) {
+
+ /* Prevent endless loop for malformed list */
+
+ err("Unable to de-list node publication\n"
+ "{%u%u}, node=0x%x, ref=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node,
+ publ->ref, publ->key);
+ goto end_node;
+ }
}
if (publ != sseq->node_list)
prev->node_list_next = publ->node_list_next;
@@ -453,22 +470,18 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
sseq->node_list = NULL;
}
}
- assert(!publ->node || (publ->node == node));
- assert(publ->ref == ref);
- assert(publ->key == key);
+end_node:
- /*
- * Contract subseq list if no more publications:
- */
- if (!sseq->node_list && !sseq->cluster_list && !sseq->zone_list) {
+ /* Contract subseq list if no more publications for that subseq */
+
+ if (!sseq->zone_list) {
free = &nseq->sseqs[nseq->first_free--];
memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq));
removed_subseq = 1;
}
- /*
- * Any subscriptions waiting ?
- */
+ /* Notify any waiting subscriptions */
+
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_subscr_report_overlap(s,
publ->lower,
@@ -478,6 +491,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
publ->node,
removed_subseq);
}
+
return publ;
}
@@ -530,7 +544,7 @@ static struct name_seq *nametbl_find_seq(u32 type)
seq_head = &table.types[hash(type)];
hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
if (ns->type == type) {
- dbg("found %x\n", ns);
+ dbg("found %p\n", ns);
return ns;
}
}
@@ -543,22 +557,21 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
{
struct name_seq *seq = nametbl_find_seq(type);
- dbg("ins_publ: <%u,%x,%x> found %x\n", type, lower, upper, seq);
+ dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq);
if (lower > upper) {
- warn("Failed to publish illegal <%u,%u,%u>\n",
+ warn("Failed to publish illegal {%u,%u,%u}\n",
type, lower, upper);
return NULL;
}
- dbg("Publishing <%u,%u,%u> from %x\n", type, lower, upper, node);
+ dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node);
if (!seq) {
seq = tipc_nameseq_create(type, &table.types[hash(type)]);
- dbg("tipc_nametbl_insert_publ: created %x\n", seq);
+ dbg("tipc_nametbl_insert_publ: created %p\n", seq);
}
if (!seq)
return NULL;
- assert(seq->type == type);
return tipc_nameseq_insert_publ(seq, type, lower, upper,
scope, node, port, key);
}
@@ -572,7 +585,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
if (!seq)
return NULL;
- dbg("Withdrawing <%u,%u> from %x\n", type, lower, node);
+ dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node);
publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
@@ -738,12 +751,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
struct publication *publ;
if (table.local_publ_count >= tipc_max_publications) {
- warn("Failed publish: max %u local publication\n",
+ warn("Publication failed, local publication limit reached (%u)\n",
tipc_max_publications);
return NULL;
}
if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) {
- warn("Failed to publish reserved name <%u,%u,%u>\n",
+ warn("Publication failed, reserved name {%u,%u,%u}\n",
type, lower, upper);
return NULL;
}
@@ -767,10 +780,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
{
struct publication *publ;
- dbg("tipc_nametbl_withdraw:<%d,%d,%d>\n", type, lower, key);
+ dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key);
write_lock_bh(&tipc_nametbl_lock);
publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
- if (publ) {
+ if (likely(publ)) {
table.local_publ_count--;
if (publ->scope != TIPC_NODE_SCOPE)
tipc_named_withdraw(publ);
@@ -780,6 +793,9 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
return 1;
}
write_unlock_bh(&tipc_nametbl_lock);
+ err("Unable to remove local publication\n"
+ "(type=%u, lower=%u, ref=%u, key=%u)\n",
+ type, lower, ref, key);
return 0;
}
@@ -787,8 +803,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void
-tipc_nametbl_subscribe(struct subscription *s)
+void tipc_nametbl_subscribe(struct subscription *s)
{
u32 type = s->seq.type;
struct name_seq *seq;
@@ -800,11 +815,13 @@ tipc_nametbl_subscribe(struct subscription *s)
}
if (seq){
spin_lock_bh(&seq->lock);
- dbg("tipc_nametbl_subscribe:found %x for <%u,%u,%u>\n",
+ dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n",
seq, type, s->seq.lower, s->seq.upper);
- assert(seq->type == type);
tipc_nameseq_subscribe(seq, s);
spin_unlock_bh(&seq->lock);
+ } else {
+ warn("Failed to create subscription for {%u,%u,%u}\n",
+ s->seq.type, s->seq.lower, s->seq.upper);
}
write_unlock_bh(&tipc_nametbl_lock);
}
@@ -813,8 +830,7 @@ tipc_nametbl_subscribe(struct subscription *s)
* tipc_nametbl_unsubscribe - remove a subscription object from name table
*/
-void
-tipc_nametbl_unsubscribe(struct subscription *s)
+void tipc_nametbl_unsubscribe(struct subscription *s)
{
struct name_seq *seq;
@@ -1036,7 +1052,7 @@ int tipc_nametbl_init(void)
{
int array_size = sizeof(struct hlist_head) * tipc_nametbl_size;
- table.types = (struct hlist_head *)kmalloc(array_size, GFP_ATOMIC);
+ table.types = kmalloc(array_size, GFP_ATOMIC);
if (!table.types)
return -ENOMEM;
@@ -1049,35 +1065,20 @@ int tipc_nametbl_init(void)
void tipc_nametbl_stop(void)
{
- struct hlist_head *seq_head;
- struct hlist_node *seq_node;
- struct hlist_node *tmp;
- struct name_seq *seq;
u32 i;
if (!table.types)
return;
+ /* Verify name table is empty, then release it */
+
write_lock_bh(&tipc_nametbl_lock);
for (i = 0; i < tipc_nametbl_size; i++) {
- seq_head = &table.types[i];
- hlist_for_each_entry_safe(seq, seq_node, tmp, seq_head, ns_list) {
- struct sub_seq *sseq = seq->sseqs;
-
- for (; sseq != &seq->sseqs[seq->first_free]; sseq++) {
- struct publication *publ = sseq->zone_list;
- assert(publ);
- do {
- struct publication *next =
- publ->zone_list_next;
- kfree(publ);
- publ = next;
- }
- while (publ != sseq->zone_list);
- }
- }
+ if (!hlist_empty(&table.types[i]))
+ err("tipc_nametbl_stop(): hash chain %u is non-null\n", i);
}
kfree(table.types);
table.types = NULL;
write_unlock_bh(&tipc_nametbl_lock);
}
+
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f7c8223ddf7d..a991bf8a7f74 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -115,7 +115,7 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-rwlock_t tipc_net_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(tipc_net_lock);
struct network tipc_net = { NULL };
struct node *tipc_net_select_remote_node(u32 addr, u32 ref)
@@ -160,14 +160,11 @@ void tipc_net_send_external_routes(u32 dest)
static int net_init(void)
{
- u32 sz = sizeof(struct _zone *) * (tipc_max_zones + 1);
-
memset(&tipc_net, 0, sizeof(tipc_net));
- tipc_net.zones = (struct _zone **)kmalloc(sz, GFP_ATOMIC);
+ tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
if (!tipc_net.zones) {
return -ENOMEM;
}
- memset(tipc_net.zones, 0, sz);
return TIPC_OK;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0d5db06e203f..fc6d09630ccd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2,7 +2,7 @@
* net/tipc/node.c: TIPC node management routines
*
* Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -61,34 +61,37 @@ struct node *tipc_node_create(u32 addr)
struct node **curr_node;
n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC);
- if (n_ptr != NULL) {
- memset(n_ptr, 0, sizeof(*n_ptr));
- n_ptr->addr = addr;
- n_ptr->lock = SPIN_LOCK_UNLOCKED;
- INIT_LIST_HEAD(&n_ptr->nsub);
-
- c_ptr = tipc_cltr_find(addr);
- if (c_ptr == NULL)
- c_ptr = tipc_cltr_create(addr);
- if (c_ptr != NULL) {
- n_ptr->owner = c_ptr;
- tipc_cltr_attach_node(c_ptr, n_ptr);
- n_ptr->last_router = -1;
-
- /* Insert node into ordered list */
- for (curr_node = &tipc_nodes; *curr_node;
- curr_node = &(*curr_node)->next) {
- if (addr < (*curr_node)->addr) {
- n_ptr->next = *curr_node;
- break;
- }
- }
- (*curr_node) = n_ptr;
- } else {
- kfree(n_ptr);
- n_ptr = NULL;
- }
- }
+ if (!n_ptr) {
+ warn("Node creation failed, no memory\n");
+ return NULL;
+ }
+
+ c_ptr = tipc_cltr_find(addr);
+ if (!c_ptr) {
+ c_ptr = tipc_cltr_create(addr);
+ }
+ if (!c_ptr) {
+ kfree(n_ptr);
+ return NULL;
+ }
+
+ memset(n_ptr, 0, sizeof(*n_ptr));
+ n_ptr->addr = addr;
+ spin_lock_init(&n_ptr->lock);
+ INIT_LIST_HEAD(&n_ptr->nsub);
+ n_ptr->owner = c_ptr;
+ tipc_cltr_attach_node(c_ptr, n_ptr);
+ n_ptr->last_router = -1;
+
+ /* Insert node into ordered list */
+ for (curr_node = &tipc_nodes; *curr_node;
+ curr_node = &(*curr_node)->next) {
+ if (addr < (*curr_node)->addr) {
+ n_ptr->next = *curr_node;
+ break;
+ }
+ }
+ (*curr_node) = n_ptr;
return n_ptr;
}
@@ -122,6 +125,8 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
{
struct link **active = &n_ptr->active_links[0];
+ n_ptr->working_links++;
+
info("Established link <%s> on network plane %c\n",
l_ptr->name, l_ptr->b_ptr->net_plane);
@@ -132,7 +137,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
return;
}
if (l_ptr->priority < active[0]->priority) {
- info("Link is standby\n");
+ info("New link <%s> becomes standby\n", l_ptr->name);
return;
}
tipc_link_send_duplicate(active[0], l_ptr);
@@ -140,8 +145,9 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
active[0] = l_ptr;
return;
}
- info("Link <%s> on network plane %c becomes standby\n",
- active[0]->name, active[0]->b_ptr->net_plane);
+ info("Old link <%s> becomes standby\n", active[0]->name);
+ if (active[1] != active[0])
+ info("Old link <%s> becomes standby\n", active[1]->name);
active[0] = active[1] = l_ptr;
}
@@ -181,6 +187,8 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr)
{
struct link **active;
+ n_ptr->working_links--;
+
if (!tipc_link_is_active(l_ptr)) {
info("Lost standby link <%s> on network plane %c\n",
l_ptr->name, l_ptr->b_ptr->net_plane);
@@ -210,8 +218,7 @@ int tipc_node_has_active_links(struct node *n_ptr)
int tipc_node_has_redundant_links(struct node *n_ptr)
{
- return (tipc_node_has_active_links(n_ptr) &&
- (n_ptr->active_links[0] != n_ptr->active_links[1]));
+ return (n_ptr->working_links > 1);
}
static int tipc_node_has_active_routes(struct node *n_ptr)
@@ -234,7 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
u32 bearer_id = l_ptr->b_ptr->identity;
char addr_string[16];
- assert(bearer_id < MAX_BEARERS);
if (n_ptr->link_cnt >= 2) {
char addr_string[16];
@@ -249,7 +255,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
n_ptr->link_cnt++;
return n_ptr;
}
- err("Attempt to establish second link on <%s> to <%s> \n",
+ err("Attempt to establish second link on <%s> to %s \n",
l_ptr->b_ptr->publ.name,
addr_string_fill(addr_string, l_ptr->addr));
}
@@ -314,7 +320,7 @@ static void node_established_contact(struct node *n_ptr)
struct cluster *c_ptr;
dbg("node_established_contact:-> %x\n", n_ptr->addr);
- if (!tipc_node_has_active_routes(n_ptr)) {
+ if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) {
tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
}
@@ -586,6 +592,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
struct sk_buff *buf;
struct node *n_ptr;
struct tipc_node_info node_info;
+ u32 payload_size;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -602,8 +609,11 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
/* For now, get space for all other nodes
(will need to modify this when slave nodes are supported */
- buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(node_info)) *
- (tipc_max_nodes - 1));
+ payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
+ if (payload_size > 32768u)
+ return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+ " (too many nodes)");
+ buf = tipc_cfg_reply_alloc(payload_size);
if (!buf)
return NULL;
@@ -627,6 +637,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
struct sk_buff *buf;
struct node *n_ptr;
struct tipc_link_info link_info;
+ u32 payload_size;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -639,12 +650,15 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
if (!tipc_nodes)
return tipc_cfg_reply_none();
-
- /* For now, get space for 2 links to all other nodes + bcast link
- (will need to modify this when slave nodes are supported */
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(link_info)) *
- (2 * (tipc_max_nodes - 1) + 1));
+
+ /* Get space for all unicast links + multicast link */
+
+ payload_size = TLV_SPACE(sizeof(link_info)) *
+ (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
+ if (payload_size > 32768u)
+ return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+ " (too many links)");
+ buf = tipc_cfg_reply_alloc(payload_size);
if (!buf)
return NULL;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 781126e084ae..a07cc79ea637 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -51,6 +51,7 @@
* @nsub: list of "node down" subscriptions monitoring node
* @active_links: pointers to active links to node
* @links: pointers to all links to node
+ * @working_links: number of working links to node (both active and standby)
* @link_cnt: number of links to node
* @permit_changeover: non-zero if node has redundant links to this system
* @routers: bitmap (used for multicluster communication)
@@ -76,6 +77,7 @@ struct node {
struct link *active_links[2];
struct link *links[MAX_BEARERS];
int link_cnt;
+ int working_links;
int permit_changeover;
u32 routers[512/32];
int last_router;
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index cff4068cc755..cc3fff3dec4f 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -47,18 +47,19 @@
void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
void *usr_handle, net_ev_handler handle_down)
{
- node_sub->node = NULL;
- if (addr == tipc_own_addr)
+ if (addr == tipc_own_addr) {
+ node_sub->node = NULL;
return;
- if (!tipc_addr_node_valid(addr)) {
- warn("node_subscr with illegal %x\n", addr);
+ }
+
+ node_sub->node = tipc_node_find(addr);
+ if (!node_sub->node) {
+ warn("Node subscription rejected, unknown node 0x%x\n", addr);
return;
}
-
node_sub->handle_node_down = handle_down;
node_sub->usr_handle = usr_handle;
- node_sub->node = tipc_node_find(addr);
- assert(node_sub->node);
+
tipc_node_lock(node_sub->node);
list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub);
tipc_node_unlock(node_sub->node);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 67e96cb1e825..b9c8c6b9e94f 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -57,8 +57,8 @@
static struct sk_buff *msg_queue_head = NULL;
static struct sk_buff *msg_queue_tail = NULL;
-spinlock_t tipc_port_list_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(tipc_port_list_lock);
+static DEFINE_SPINLOCK(queue_lock);
static LIST_HEAD(ports);
static void port_handle_node_down(unsigned long ref);
@@ -168,7 +168,6 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
struct port_list *item = dp;
int cnt = 0;
- assert(buf);
msg = buf_msg(buf);
/* Create destination port list, if one wasn't supplied */
@@ -196,7 +195,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
if (b == NULL) {
- warn("Buffer allocation failure\n");
+ warn("Unable to deliver multicast message(s)\n");
msg_dbg(msg, "LOST:");
goto exit;
}
@@ -227,15 +226,14 @@ u32 tipc_createport_raw(void *usr_handle,
struct tipc_msg *msg;
u32 ref;
- p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC);
- if (p_ptr == NULL) {
- warn("Memory squeeze; failed to create port\n");
+ p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
+ if (!p_ptr) {
+ warn("Port creation failed, no memory\n");
return 0;
}
- memset(p_ptr, 0, sizeof(*p_ptr));
ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
if (!ref) {
- warn("Reference Table Exhausted\n");
+ warn("Port creation failed, reference table exhausted\n");
kfree(p_ptr);
return 0;
}
@@ -810,18 +808,20 @@ static void port_dispatcher_sigh(void *dummy)
void *usr_handle;
int connected;
int published;
+ u32 message_type;
struct sk_buff *next = buf->next;
struct tipc_msg *msg = buf_msg(buf);
u32 dref = msg_destport(msg);
+ message_type = msg_type(msg);
+ if (message_type > TIPC_DIRECT_MSG)
+ goto reject; /* Unsupported message type */
+
p_ptr = tipc_port_lock(dref);
- if (!p_ptr) {
- /* Port deleted while msg in queue */
- tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
- buf = next;
- continue;
- }
+ if (!p_ptr)
+ goto reject; /* Port deleted while msg in queue */
+
orig.ref = msg_origport(msg);
orig.node = msg_orignode(msg);
up_ptr = p_ptr->user_port;
@@ -832,7 +832,7 @@ static void port_dispatcher_sigh(void *dummy)
if (unlikely(msg_errcode(msg)))
goto err;
- switch (msg_type(msg)) {
+ switch (message_type) {
case TIPC_CONN_MSG:{
tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
@@ -874,6 +874,7 @@ static void port_dispatcher_sigh(void *dummy)
&orig);
break;
}
+ case TIPC_MCAST_MSG:
case TIPC_NAMED_MSG:{
tipc_named_msg_event cb = up_ptr->named_msg_cb;
@@ -886,7 +887,8 @@ static void port_dispatcher_sigh(void *dummy)
goto reject;
dseq.type = msg_nametype(msg);
dseq.lower = msg_nameinst(msg);
- dseq.upper = dseq.lower;
+ dseq.upper = (message_type == TIPC_NAMED_MSG)
+ ? dseq.lower : msg_nameupper(msg);
skb_pull(buf, msg_hdr_sz(msg));
cb(usr_handle, dref, &buf, msg_data(msg),
msg_data_sz(msg), msg_importance(msg),
@@ -899,7 +901,7 @@ static void port_dispatcher_sigh(void *dummy)
buf = next;
continue;
err:
- switch (msg_type(msg)) {
+ switch (message_type) {
case TIPC_CONN_MSG:{
tipc_conn_shutdown_event cb =
@@ -931,6 +933,7 @@ err:
msg_data_sz(msg), msg_errcode(msg), &orig);
break;
}
+ case TIPC_MCAST_MSG:
case TIPC_NAMED_MSG:{
tipc_named_msg_err_event cb =
up_ptr->named_err_cb;
@@ -940,7 +943,8 @@ err:
break;
dseq.type = msg_nametype(msg);
dseq.lower = msg_nameinst(msg);
- dseq.upper = dseq.lower;
+ dseq.upper = (message_type == TIPC_NAMED_MSG)
+ ? dseq.lower : msg_nameupper(msg);
skb_pull(buf, msg_hdr_sz(msg));
cb(usr_handle, dref, &buf, msg_data(msg),
msg_data_sz(msg), msg_errcode(msg), &dseq);
@@ -1053,8 +1057,9 @@ int tipc_createport(u32 user_ref,
struct port *p_ptr;
u32 ref;
- up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
- if (up_ptr == NULL) {
+ up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
+ if (!up_ptr) {
+ warn("Port creation failed, no memory\n");
return -ENOMEM;
}
ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance);
@@ -1165,8 +1170,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
p_ptr = tipc_port_lock(ref);
if (!p_ptr)
return -EINVAL;
- if (!p_ptr->publ.published)
- goto exit;
if (!seq) {
list_for_each_entry_safe(publ, tpubl,
&p_ptr->publications, pport_list) {
@@ -1193,7 +1196,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
}
if (list_empty(&p_ptr->publications))
p_ptr->publ.published = 0;
-exit:
tipc_port_unlock(p_ptr);
return res;
}
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 33bbf5095094..e6d6ae22ea49 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -63,7 +63,7 @@
struct ref_table tipc_ref_table = { NULL };
-static rwlock_t ref_table_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(ref_table_lock);
/**
* tipc_ref_table_init - create reference table for objects
@@ -79,7 +79,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
while (sz < requested_size) {
sz <<= 1;
}
- table = (struct reference *)vmalloc(sz * sizeof(struct reference));
+ table = vmalloc(sz * sizeof(*table));
if (table == NULL)
return -ENOMEM;
@@ -87,7 +87,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
index_mask = sz - 1;
for (i = sz - 1; i >= 0; i--) {
table[i].object = NULL;
- table[i].lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&table[i].lock);
table[i].data.next_plus_upper = (start & ~index_mask) + i - 1;
}
tipc_ref_table.entries = table;
@@ -127,7 +127,14 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
u32 next_plus_upper;
u32 reference = 0;
- assert(tipc_ref_table.entries && object);
+ if (!object) {
+ err("Attempt to acquire reference to non-existent object\n");
+ return 0;
+ }
+ if (!tipc_ref_table.entries) {
+ err("Reference table not found during acquisition attempt\n");
+ return 0;
+ }
write_lock_bh(&ref_table_lock);
if (tipc_ref_table.first_free) {
@@ -162,15 +169,28 @@ void tipc_ref_discard(u32 ref)
u32 index;
u32 index_mask;
- assert(tipc_ref_table.entries);
- assert(ref != 0);
+ if (!ref) {
+ err("Attempt to discard reference 0\n");
+ return;
+ }
+ if (!tipc_ref_table.entries) {
+ err("Reference table not found during discard attempt\n");
+ return;
+ }
write_lock_bh(&ref_table_lock);
index_mask = tipc_ref_table.index_mask;
index = ref & index_mask;
entry = &(tipc_ref_table.entries[index]);
- assert(entry->object != 0);
- assert(entry->data.reference == ref);
+
+ if (!entry->object) {
+ err("Attempt to discard reference to non-existent object\n");
+ goto exit;
+ }
+ if (entry->data.reference != ref) {
+ err("Attempt to discard non-existent reference\n");
+ goto exit;
+ }
/* mark entry as unused */
entry->object = NULL;
@@ -184,6 +204,7 @@ void tipc_ref_discard(u32 ref)
/* increment upper bits of entry to invalidate subsequent references */
entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1);
+exit:
write_unlock_bh(&ref_table_lock);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 648a734e6044..32d778448a00 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -169,12 +169,6 @@ static int tipc_create(struct socket *sock, int protocol)
struct sock *sk;
u32 ref;
- if ((sock->type != SOCK_STREAM) &&
- (sock->type != SOCK_SEQPACKET) &&
- (sock->type != SOCK_DGRAM) &&
- (sock->type != SOCK_RDM))
- return -EPROTOTYPE;
-
if (unlikely(protocol != 0))
return -EPROTONOSUPPORT;
@@ -199,6 +193,9 @@ static int tipc_create(struct socket *sock, int protocol)
sock->ops = &msg_ops;
sock->state = SS_READY;
break;
+ default:
+ tipc_deleteport(ref);
+ return -EPROTOTYPE;
}
sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
@@ -426,7 +423,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
return -EFAULT;
- if ((ntohs(hdr.tcm_type) & 0xC000) & (!capable(CAP_NET_ADMIN)))
+ if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
return -EACCES;
return 0;
@@ -437,7 +434,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
* @iocb: (unused)
* @sock: socket structure
* @m: message to send
- * @total_len: (unused)
+ * @total_len: length of message
*
* Message must have an destination specified explicitly.
* Used for SOCK_RDM and SOCK_DGRAM messages,
@@ -458,7 +455,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
if (unlikely(!dest))
return -EDESTADDRREQ;
- if (unlikely(dest->family != AF_TIPC))
+ if (unlikely((m->msg_namelen < sizeof(*dest)) ||
+ (dest->family != AF_TIPC)))
return -EINVAL;
needs_conn = (sock->state != SS_READY);
@@ -470,6 +468,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
if ((tsock->p->published) ||
((sock->type == SOCK_STREAM) && (total_len != 0)))
return -EOPNOTSUPP;
+ if (dest->addrtype == TIPC_ADDR_NAME) {
+ tsock->p->conn_type = dest->addr.name.name.type;
+ tsock->p->conn_instance = dest->addr.name.name.instance;
+ }
}
if (down_interruptible(&tsock->sem))
@@ -538,7 +540,7 @@ exit:
* @iocb: (unused)
* @sock: socket structure
* @m: message to send
- * @total_len: (unused)
+ * @total_len: length of message
*
* Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
*
@@ -561,15 +563,15 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
return -ERESTARTSYS;
}
- if (unlikely(sock->state != SS_CONNECTED)) {
- if (sock->state == SS_DISCONNECTING)
- res = -EPIPE;
- else
- res = -ENOTCONN;
- goto exit;
- }
-
do {
+ if (unlikely(sock->state != SS_CONNECTED)) {
+ if (sock->state == SS_DISCONNECTING)
+ res = -EPIPE;
+ else
+ res = -ENOTCONN;
+ goto exit;
+ }
+
res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov);
if (likely(res != -ELINKCONG)) {
exit:
@@ -597,7 +599,8 @@ exit:
*
* Used for SOCK_STREAM data.
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Returns the number of bytes sent on success (or partial success),
+ * or errno if no data sent
*/
@@ -611,6 +614,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
char __user *curr_start;
int curr_left;
int bytes_to_send;
+ int bytes_sent;
int res;
if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
@@ -633,11 +637,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
* of small iovec entries into send_packet().
*/
- my_msg = *m;
- curr_iov = my_msg.msg_iov;
- curr_iovlen = my_msg.msg_iovlen;
+ curr_iov = m->msg_iov;
+ curr_iovlen = m->msg_iovlen;
my_msg.msg_iov = &my_iov;
my_msg.msg_iovlen = 1;
+ bytes_sent = 0;
while (curr_iovlen--) {
curr_start = curr_iov->iov_base;
@@ -648,16 +652,18 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
? curr_left : TIPC_MAX_USER_MSG_SIZE;
my_iov.iov_base = curr_start;
my_iov.iov_len = bytes_to_send;
- if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0)
- return res;
+ if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
+ return bytes_sent ? bytes_sent : res;
+ }
curr_left -= bytes_to_send;
curr_start += bytes_to_send;
+ bytes_sent += bytes_to_send;
}
curr_iov++;
}
- return total_len;
+ return bytes_sent;
}
/**
@@ -727,6 +733,7 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
u32 anc_data[3];
u32 err;
u32 dest_type;
+ int has_name;
int res;
if (likely(m->msg_controllen == 0))
@@ -738,10 +745,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (unlikely(err)) {
anc_data[0] = err;
anc_data[1] = msg_data_sz(msg);
- if ((res = put_cmsg(m, SOL_SOCKET, TIPC_ERRINFO, 8, anc_data)))
+ if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
return res;
if (anc_data[1] &&
- (res = put_cmsg(m, SOL_SOCKET, TIPC_RETDATA, anc_data[1],
+ (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
msg_data(msg))))
return res;
}
@@ -751,25 +758,28 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
switch (dest_type) {
case TIPC_NAMED_MSG:
+ has_name = 1;
anc_data[0] = msg_nametype(msg);
anc_data[1] = msg_namelower(msg);
anc_data[2] = msg_namelower(msg);
break;
case TIPC_MCAST_MSG:
+ has_name = 1;
anc_data[0] = msg_nametype(msg);
anc_data[1] = msg_namelower(msg);
anc_data[2] = msg_nameupper(msg);
break;
case TIPC_CONN_MSG:
+ has_name = (tport->conn_type != 0);
anc_data[0] = tport->conn_type;
anc_data[1] = tport->conn_instance;
anc_data[2] = tport->conn_instance;
break;
default:
- anc_data[0] = 0;
+ has_name = 0;
}
- if (anc_data[0] &&
- (res = put_cmsg(m, SOL_SOCKET, TIPC_DESTNAME, 12, anc_data)))
+ if (has_name &&
+ (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
return res;
return 0;
@@ -960,7 +970,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
restart:
if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
(flags & MSG_DONTWAIT))) {
- res = (sz_copied == 0) ? -EWOULDBLOCK : 0;
+ res = -EWOULDBLOCK;
goto exit;
}
@@ -1051,7 +1061,7 @@ restart:
exit:
up(&tsock->sem);
- return res ? res : sz_copied;
+ return sz_copied ? sz_copied : res;
}
/**
@@ -1236,7 +1246,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
if (sock->state == SS_READY)
return -EOPNOTSUPP;
- /* MOVE THE REST OF THIS ERROR CHECKING TO send_msg()? */
+ /* Issue Posix-compliant error code if socket is in the wrong state */
+
if (sock->state == SS_LISTENING)
return -EOPNOTSUPP;
if (sock->state == SS_CONNECTING)
@@ -1244,13 +1255,20 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
if (sock->state != SS_UNCONNECTED)
return -EISCONN;
- if ((dst->family != AF_TIPC) ||
- ((dst->addrtype != TIPC_ADDR_NAME) && (dst->addrtype != TIPC_ADDR_ID)))
+ /*
+ * Reject connection attempt using multicast address
+ *
+ * Note: send_msg() validates the rest of the address fields,
+ * so there's no need to do it here
+ */
+
+ if (dst->addrtype == TIPC_ADDR_MCAST)
return -EINVAL;
/* Send a 'SYN-' to destination */
m.msg_name = dest;
+ m.msg_namelen = destlen;
if ((res = send_msg(NULL, sock, &m, 0)) < 0) {
sock->state = SS_DISCONNECTING;
return res;
@@ -1269,10 +1287,6 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
msg = buf_msg(buf);
res = auto_connect(sock, tsock, msg);
if (!res) {
- if (dst->addrtype == TIPC_ADDR_NAME) {
- tsock->p->conn_type = dst->addr.name.name.type;
- tsock->p->conn_instance = dst->addr.name.name.instance;
- }
if (!msg_data_sz(msg))
advance_queue(tsock);
}
@@ -1386,7 +1400,7 @@ exit:
/**
* shutdown - shutdown socket connection
* @sock: socket structure
- * @how: direction to close (always treated as read + write)
+ * @how: direction to close (unused; always treated as read + write)
*
* Terminates connection (if necessary), then purges socket's receive queue.
*
@@ -1469,7 +1483,8 @@ restart:
* Returns 0 on success, errno otherwise
*/
-static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol)
+static int setsockopt(struct socket *sock,
+ int lvl, int opt, char __user *ov, int ol)
{
struct tipc_sock *tsock = tipc_sk(sock->sk);
u32 value;
@@ -1525,7 +1540,8 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol)
* Returns 0 on success, errno otherwise
*/
-static int getsockopt(struct socket *sock, int lvl, int opt, char *ov, int *ol)
+static int getsockopt(struct socket *sock,
+ int lvl, int opt, char __user *ov, int *ol)
{
struct tipc_sock *tsock = tipc_sk(sock->sk);
int len;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c5f026c7fd38..c51600ba5f4a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -266,7 +266,8 @@ static void subscr_subscribe(struct tipc_subscr *s,
/* Refuse subscription if global limit exceeded */
if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
- warn("Failed: max %u subscriptions\n", tipc_max_subscriptions);
+ warn("Subscription rejected, subscription limit reached (%u)\n",
+ tipc_max_subscriptions);
subscr_terminate(subscriber);
return;
}
@@ -274,8 +275,8 @@ static void subscr_subscribe(struct tipc_subscr *s,
/* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
- if (sub == NULL) {
- warn("Memory squeeze; ignoring subscription\n");
+ if (!sub) {
+ warn("Subscription rejected, no memory\n");
subscr_terminate(subscriber);
return;
}
@@ -298,8 +299,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
if ((((sub->filter != TIPC_SUB_PORTS)
&& (sub->filter != TIPC_SUB_SERVICE)))
|| (sub->seq.lower > sub->seq.upper)) {
- warn("Rejecting illegal subscription %u,%u,%u\n",
- sub->seq.type, sub->seq.lower, sub->seq.upper);
+ warn("Subscription rejected, illegal request\n");
kfree(sub);
subscr_terminate(subscriber);
return;
@@ -387,23 +387,22 @@ static void subscr_named_msg_event(void *usr_handle,
dbg("subscr_named_msg_event: orig = %x own = %x,\n",
orig->node, tipc_own_addr);
if (size && (size != sizeof(struct tipc_subscr))) {
- warn("Received tipc_subscr of invalid size\n");
+ warn("Subscriber rejected, invalid subscription size\n");
return;
}
/* Create subscriber object */
- subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC);
+ subscriber = kzalloc(sizeof(struct subscriber), GFP_ATOMIC);
if (subscriber == NULL) {
- warn("Memory squeeze; ignoring subscriber setup\n");
+ warn("Subscriber rejected, no memory\n");
return;
}
- memset(subscriber, 0, sizeof(struct subscriber));
INIT_LIST_HEAD(&subscriber->subscription_list);
INIT_LIST_HEAD(&subscriber->subscriber_list);
subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock);
if (subscriber->ref == 0) {
- warn("Failed to acquire subscriber reference\n");
+ warn("Subscriber rejected, reference table exhausted\n");
kfree(subscriber);
return;
}
@@ -422,7 +421,7 @@ static void subscr_named_msg_event(void *usr_handle,
NULL,
&subscriber->port_ref);
if (subscriber->port_ref == 0) {
- warn("Memory squeeze; failed to create subscription port\n");
+ warn("Subscriber rejected, unable to create port\n");
tipc_ref_discard(subscriber->ref);
kfree(subscriber);
return;
@@ -457,7 +456,7 @@ int tipc_subscr_start(void)
int res = -1;
memset(&topsrv, 0, sizeof (topsrv));
- topsrv.lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&topsrv.lock);
INIT_LIST_HEAD(&topsrv.subscriber_list);
spin_lock_bh(&topsrv.lock);
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
index 3f3f933976e9..04d1b9be9c51 100644
--- a/net/tipc/user_reg.c
+++ b/net/tipc/user_reg.c
@@ -67,7 +67,7 @@ struct tipc_user {
static struct tipc_user *users = NULL;
static u32 next_free_user = MAX_USERID + 1;
-static spinlock_t reg_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(reg_lock);
/**
* reg_init - create TIPC user registry (but don't activate it)
@@ -82,9 +82,8 @@ static int reg_init(void)
spin_lock_bh(&reg_lock);
if (!users) {
- users = (struct tipc_user *)kmalloc(USER_LIST_SIZE, GFP_ATOMIC);
+ users = kzalloc(USER_LIST_SIZE, GFP_ATOMIC);
if (users) {
- memset(users, 0, USER_LIST_SIZE);
for (i = 1; i <= MAX_USERID; i++) {
users[i].next = i - 1;
}
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 2803e1b4f170..f5b00ea2d5ac 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -44,19 +44,23 @@
struct _zone *tipc_zone_create(u32 addr)
{
- struct _zone *z_ptr = NULL;
+ struct _zone *z_ptr;
u32 z_num;
- if (!tipc_addr_domain_valid(addr))
+ if (!tipc_addr_domain_valid(addr)) {
+ err("Zone creation failed, invalid domain 0x%x\n", addr);
return NULL;
+ }
- z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC);
- if (z_ptr != NULL) {
- memset(z_ptr, 0, sizeof(*z_ptr));
- z_num = tipc_zone(addr);
- z_ptr->addr = tipc_addr(z_num, 0, 0);
- tipc_net.zones[z_num] = z_ptr;
+ z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC);
+ if (!z_ptr) {
+ warn("Zone creation failed, insufficient memory\n");
+ return NULL;
}
+
+ z_num = tipc_zone(addr);
+ z_ptr->addr = tipc_addr(z_num, 0, 0);
+ tipc_net.zones[z_num] = z_ptr;
return z_ptr;
}
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
index 267999c5a240..5ab3d08602e2 100644
--- a/net/tipc/zone.h
+++ b/net/tipc/zone.h
@@ -2,7 +2,7 @@
* net/tipc/zone.h: Include file for TIPC zone management routines
*
* Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2006, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,7 @@
* struct _zone - TIPC zone structure
* @addr: network address of zone
* @clusters: array of pointers to all clusters within zone
- * @links: (used for inter-zone communication)
+ * @links: number of (unicast) links to zone
*/
struct _zone {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d901465ce013..b43a27828df5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -83,7 +83,6 @@
*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -118,7 +117,7 @@
#include <net/checksum.h>
#include <linux/security.h>
-int sysctl_unix_max_dgram_qlen = 10;
+int sysctl_unix_max_dgram_qlen __read_mostly = 10;
struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
DEFINE_SPINLOCK(unix_table_lock);
@@ -128,6 +127,24 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
+#ifdef CONFIG_SECURITY_NETWORK
+static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
+}
+
+static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->secid = *UNIXSID(skb);
+}
+#else
+static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{ }
+
+static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{ }
+#endif /* CONFIG_SECURITY_NETWORK */
+
/*
* SMP locking strategy:
* hash table is protected with spinlock unix_table_lock
@@ -542,6 +559,14 @@ static struct proto unix_proto = {
.obj_size = sizeof(struct unix_sock),
};
+/*
+ * AF_UNIX sockets do not interact with hardware, hence they
+ * dont trigger interrupts - so it's safe for them to have
+ * bh-unsafe locking for their sk_receive_queue.lock. Split off
+ * this special lock-class by reinitializing the spinlock key:
+ */
+static struct lock_class_key af_unix_sk_receive_queue_lock_key;
+
static struct sock * unix_create1(struct socket *sock)
{
struct sock *sk = NULL;
@@ -557,6 +582,8 @@ static struct sock * unix_create1(struct socket *sock)
atomic_inc(&unix_nr_socks);
sock_init_data(sock,sk);
+ lockdep_set_class(&sk->sk_receive_queue.lock,
+ &af_unix_sk_receive_queue_lock_key);
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
@@ -630,11 +657,10 @@ static int unix_autobind(struct socket *sock)
goto out;
err = -ENOMEM;
- addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
+ addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
if (!addr)
goto out;
- memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
addr->name->sun_family = AF_UNIX;
atomic_set(&addr->refcnt, 1);
@@ -1022,7 +1048,7 @@ restart:
goto out_unlock;
}
- unix_state_wlock(sk);
+ unix_state_wlock_nested(sk);
if (sk->sk_state != st) {
unix_state_wunlock(sk);
@@ -1290,6 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
if (siocb->scm->fp)
unix_attach_fds(siocb->scm, skb);
+ unix_get_secdata(siocb->scm, skb);
skb->h.raw = skb->data;
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
@@ -1570,6 +1597,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
siocb->scm->creds = *UNIXCREDS(skb);
+ unix_set_secdata(siocb->scm, skb);
if (!(flags & MSG_PEEK))
{
@@ -2032,10 +2060,7 @@ static int __init af_unix_init(void)
int rc = -1;
struct sk_buff *dummy_skb;
- if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
- printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
- goto out;
- }
+ BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index b1265187b4a8..6f39faa15832 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -32,7 +32,6 @@
*
******************************************************************************/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -371,12 +370,11 @@ static int wanpipe_listen_rcv (struct sk_buff *skb, struct sock *sk)
* used by the ioctl call to read call information
* and to execute commands.
*/
- if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
+ if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
wanpipe_kill_sock_irq (newsk);
release_device(dev);
return -ENOMEM;
}
- memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
memcpy(mbox_ptr,skb->data,skb->len);
/* Register the lcn on which incoming call came
@@ -508,11 +506,10 @@ static struct sock *wanpipe_alloc_socket(void)
if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL)
return NULL;
- if ((wan_opt = kmalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
+ if ((wan_opt = kzalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
sk_free(sk);
return NULL;
}
- memset(wan_opt, 0x00, sizeof(struct wanpipe_opt));
wp_sk(sk) = wan_opt;
@@ -2012,10 +2009,9 @@ static int set_ioctl_cmd (struct sock *sk, void *arg)
dev_put(dev);
- if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
+ if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
return -ENOMEM;
- memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
wp_sk(sk)->mbox = mbox_ptr;
wanpipe_link_driver(dev,sk);
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index c34833dc7cc1..9479659277ae 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -42,7 +42,6 @@
* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels.
*****************************************************************************/
-#include <linux/config.h>
#include <linux/stddef.h> /* offsetof(), etc. */
#include <linux/capability.h>
#include <linux/errno.h> /* return codes */
@@ -643,18 +642,16 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
if (cnf->config_id == WANCONFIG_MPPP) {
#ifdef CONFIG_WANPIPE_MULTPPP
- pppdev = kmalloc(sizeof(struct ppp_device), GFP_KERNEL);
+ pppdev = kzalloc(sizeof(struct ppp_device), GFP_KERNEL);
err = -ENOBUFS;
if (pppdev == NULL)
goto out;
- memset(pppdev, 0, sizeof(struct ppp_device));
- pppdev->dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+ pppdev->dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
if (pppdev->dev == NULL) {
kfree(pppdev);
err = -ENOBUFS;
goto out;
}
- memset(pppdev->dev, 0, sizeof(struct net_device));
err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf);
dev = pppdev->dev;
#else
@@ -664,11 +661,10 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
goto out;
#endif
} else {
- dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+ dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
err = -ENOBUFS;
if (dev == NULL)
goto out;
- memset(dev, 0, sizeof(struct net_device));
err = wandev->new_if(wandev, dev, cnf);
}
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index c28ba5a47209..930ea59463ad 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -20,7 +20,6 @@
* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE)
*****************************************************************************/
-#include <linux/config.h>
#include <linux/init.h> /* __initfunc et al. */
#include <linux/stddef.h> /* offsetof(), etc. */
#include <linux/errno.h> /* return codes */
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 282ce4e40d7b..52a2726d327f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -35,7 +35,6 @@
* response
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index adfe7b8df355..47b68a301677 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -17,7 +17,6 @@
* 2000-09-04 Henner Eisen Prevent freeing a dangling skb.
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index dfb80116c59f..a11837d361d2 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -17,7 +17,6 @@
* 2002/10/06 Arnaldo Carvalho de Melo seq_file support
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 6c5d37517035..2a3fe986b245 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -17,7 +17,6 @@
* X.25 001 Jonathan Naylor Started coding.
*/
-#include <linux/config.h>
#include <linux/if_arp.h>
#include <linux/init.h>
#include <net/x25.h>
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 0c1c04322baf..0faab6332586 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -6,14 +6,24 @@ config XFRM
depends on NET
config XFRM_USER
- tristate "IPsec user configuration interface"
+ tristate "Transformation user configuration interface"
depends on INET && XFRM
---help---
- Support for IPsec user configuration interface used
- by native Linux tools.
+ Support for Transformation(XFRM) user configuration interface
+ like IPsec used by native Linux tools.
If unsure, say Y.
+config XFRM_SUB_POLICY
+ bool "Transformation sub policy support (EXPERIMENTAL)"
+ depends on XFRM && EXPERIMENTAL
+ ---help---
+ Support sub policy for developers. By using sub policy with main
+ one, two policies can be applied to the same packet at once.
+ Policy which lives shorter time in kernel should be a sub.
+
+ If unsure, say N.
+
config NET_KEY
tristate "PF_KEY sockets"
select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 693aac1aa833..de3c1a625a46 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -2,6 +2,7 @@
# Makefile for the XFRM subsystem.
#
-obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
+ xfrm_input.o xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6ed3302312fb..5a0dbeb6bbe8 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -9,7 +9,6 @@
* any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
@@ -31,7 +30,8 @@
*/
static struct xfrm_algo_desc aalg_list[] = {
{
- .name = "digest_null",
+ .name = "hmac(digest_null)",
+ .compat = "digest_null",
.uinfo = {
.auth = {
@@ -48,7 +48,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
{
- .name = "md5",
+ .name = "hmac(md5)",
+ .compat = "md5",
.uinfo = {
.auth = {
@@ -65,7 +66,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
{
- .name = "sha1",
+ .name = "hmac(sha1)",
+ .compat = "sha1",
.uinfo = {
.auth = {
@@ -82,7 +84,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
{
- .name = "sha256",
+ .name = "hmac(sha256)",
+ .compat = "sha256",
.uinfo = {
.auth = {
@@ -99,7 +102,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
{
- .name = "ripemd160",
+ .name = "hmac(ripemd160)",
+ .compat = "ripemd160",
.uinfo = {
.auth = {
@@ -119,7 +123,8 @@ static struct xfrm_algo_desc aalg_list[] = {
static struct xfrm_algo_desc ealg_list[] = {
{
- .name = "cipher_null",
+ .name = "ecb(cipher_null)",
+ .compat = "cipher_null",
.uinfo = {
.encr = {
@@ -136,7 +141,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "des",
+ .name = "cbc(des)",
+ .compat = "des",
.uinfo = {
.encr = {
@@ -153,7 +159,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "des3_ede",
+ .name = "cbc(des3_ede)",
+ .compat = "des3_ede",
.uinfo = {
.encr = {
@@ -170,7 +177,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "cast128",
+ .name = "cbc(cast128)",
+ .compat = "cast128",
.uinfo = {
.encr = {
@@ -187,7 +195,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "blowfish",
+ .name = "cbc(blowfish)",
+ .compat = "blowfish",
.uinfo = {
.encr = {
@@ -204,7 +213,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "aes",
+ .name = "cbc(aes)",
+ .compat = "aes",
.uinfo = {
.encr = {
@@ -221,7 +231,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "serpent",
+ .name = "cbc(serpent)",
+ .compat = "serpent",
.uinfo = {
.encr = {
@@ -238,7 +249,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
{
- .name = "twofish",
+ .name = "cbc(twofish)",
+ .compat = "twofish",
.uinfo = {
.encr = {
@@ -351,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
- int entries, char *name,
- int probe)
+ int entries, u32 type, u32 mask,
+ char *name, int probe)
{
int i, status;
@@ -360,7 +372,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
return NULL;
for (i = 0; i < entries; i++) {
- if (strcmp(name, list[i].name))
+ if (strcmp(name, list[i].name) &&
+ (!list[i].compat || strcmp(name, list[i].compat)))
continue;
if (list[i].available)
@@ -369,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
if (!probe)
break;
- status = crypto_alg_available(name, 0);
+ status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC);
if (!status)
break;
@@ -381,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
{
- return xfrm_get_byname(aalg_list, aalg_entries(), name, probe);
+ return xfrm_get_byname(aalg_list, aalg_entries(),
+ CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK,
+ name, probe);
}
EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
{
- return xfrm_get_byname(ealg_list, ealg_entries(), name, probe);
+ return xfrm_get_byname(ealg_list, ealg_entries(),
+ CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK,
+ name, probe);
}
EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
{
- return xfrm_get_byname(calg_list, calg_entries(), name, probe);
+ return xfrm_get_byname(calg_list, calg_entries(),
+ CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK,
+ name, probe);
}
EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
@@ -428,19 +447,22 @@ void xfrm_probe_algs(void)
BUG_ON(in_softirq());
for (i = 0; i < aalg_entries(); i++) {
- status = crypto_alg_available(aalg_list[i].name, 0);
+ status = crypto_has_hash(aalg_list[i].name, 0,
+ CRYPTO_ALG_ASYNC);
if (aalg_list[i].available != status)
aalg_list[i].available = status;
}
for (i = 0; i < ealg_entries(); i++) {
- status = crypto_alg_available(ealg_list[i].name, 0);
+ status = crypto_has_blkcipher(ealg_list[i].name, 0,
+ CRYPTO_ALG_ASYNC);
if (ealg_list[i].available != status)
ealg_list[i].available = status;
}
for (i = 0; i < calg_entries(); i++) {
- status = crypto_alg_available(calg_list[i].name, 0);
+ status = crypto_has_comp(calg_list[i].name, 0,
+ CRYPTO_ALG_ASYNC);
if (calg_list[i].available != status)
calg_list[i].available = status;
}
@@ -472,11 +494,12 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
/* Move to common area: it is shared with AH. */
-void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
- int offset, int len, icv_update_fn_t icv_update)
+int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
+ int offset, int len, icv_update_fn_t icv_update)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ int err;
struct scatterlist sg;
/* Checksum header. */
@@ -488,10 +511,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
sg.length = copy;
- icv_update(tfm, &sg, 1);
+ err = icv_update(desc, &sg, copy);
+ if (unlikely(err))
+ return err;
if ((len -= copy) == 0)
- return;
+ return 0;
offset += copy;
}
@@ -511,10 +536,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
sg.offset = frag->page_offset + offset-start;
sg.length = copy;
- icv_update(tfm, &sg, 1);
+ err = icv_update(desc, &sg, copy);
+ if (unlikely(err))
+ return err;
if (!(len -= copy))
- return;
+ return 0;
offset += copy;
}
start = end;
@@ -532,15 +559,19 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+ err = skb_icv_walk(list, desc, offset-start,
+ copy, icv_update);
+ if (unlikely(err))
+ return err;
if ((len -= copy) == 0)
- return;
+ return 0;
offset += copy;
}
start = end;
}
}
BUG_ON(len);
+ return 0;
}
EXPORT_SYMBOL_GPL(skb_icv_walk);
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
new file mode 100644
index 000000000000..37643bb8768a
--- /dev/null
+++ b/net/xfrm/xfrm_hash.c
@@ -0,0 +1,41 @@
+/* xfrm_hash.c: Common hash table code.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/xfrm.h>
+
+#include "xfrm_hash.h"
+
+struct hlist_head *xfrm_hash_alloc(unsigned int sz)
+{
+ struct hlist_head *n;
+
+ if (sz <= PAGE_SIZE)
+ n = kmalloc(sz, GFP_KERNEL);
+ else if (hashdist)
+ n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+ else
+ n = (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL, get_order(sz));
+
+ if (n)
+ memset(n, 0, sz);
+
+ return n;
+}
+
+void xfrm_hash_free(struct hlist_head *n, unsigned int sz)
+{
+ if (sz <= PAGE_SIZE)
+ kfree(n);
+ else if (hashdist)
+ vfree(n);
+ else
+ free_pages((unsigned long)n, get_order(sz));
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
new file mode 100644
index 000000000000..6ac4e4f033ac
--- /dev/null
+++ b/net/xfrm/xfrm_hash.h
@@ -0,0 +1,128 @@
+#ifndef _XFRM_HASH_H
+#define _XFRM_HASH_H
+
+#include <linux/xfrm.h>
+#include <linux/socket.h>
+
+static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
+{
+ return ntohl(addr->a4);
+}
+
+static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
+{
+ return ntohl(addr->a6[2] ^ addr->a6[3]);
+}
+
+static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+ return ntohl(daddr->a4 ^ saddr->a4);
+}
+
+static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+ return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+ saddr->a6[2] ^ saddr->a6[3]);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
+ u32 reqid, unsigned short family,
+ unsigned int hmask)
+{
+ unsigned int h = family ^ reqid;
+ switch (family) {
+ case AF_INET:
+ h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
+ break;
+ case AF_INET6:
+ h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
+ break;
+ }
+ return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+ unsigned short family,
+ unsigned int hmask)
+{
+ unsigned int h = family;
+ switch (family) {
+ case AF_INET:
+ h ^= __xfrm4_addr_hash(saddr);
+ break;
+ case AF_INET6:
+ h ^= __xfrm6_addr_hash(saddr);
+ break;
+ };
+ return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned int
+__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family,
+ unsigned int hmask)
+{
+ unsigned int h = (__force u32)spi ^ proto;
+ switch (family) {
+ case AF_INET:
+ h ^= __xfrm4_addr_hash(daddr);
+ break;
+ case AF_INET6:
+ h ^= __xfrm6_addr_hash(daddr);
+ break;
+ }
+ return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+ return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+ xfrm_address_t *daddr = &sel->daddr;
+ xfrm_address_t *saddr = &sel->saddr;
+ unsigned int h = 0;
+
+ switch (family) {
+ case AF_INET:
+ if (sel->prefixlen_d != 32 ||
+ sel->prefixlen_s != 32)
+ return hmask + 1;
+
+ h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+ break;
+
+ case AF_INET6:
+ if (sel->prefixlen_d != 128 ||
+ sel->prefixlen_s != 128)
+ return hmask + 1;
+
+ h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+ break;
+ };
+ h ^= (h >> 16);
+ return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+ unsigned int h = 0;
+
+ switch (family) {
+ case AF_INET:
+ h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+ break;
+
+ case AF_INET6:
+ h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+ break;
+ };
+ h ^= (h >> 16);
+ return h & hmask;
+}
+
+extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
+extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
+
+#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 891a6090cc09..e8198a2c785d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(secpath_dup);
/* Fetch spi and seq from ipsec header */
-int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
{
int offset, offset_seq;
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
case IPPROTO_COMP:
if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
return -EINVAL;
- *spi = htonl(ntohs(*(u16*)(skb->h.raw + 2)));
+ *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
*seq = 0;
return 0;
default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
if (!pskb_may_pull(skb, 16))
return -EINVAL;
- *spi = *(u32*)(skb->h.raw + offset);
- *seq = *(u32*)(skb->h.raw + offset_seq);
+ *spi = *(__be32*)(skb->h.raw + offset);
+ *seq = *(__be32*)(skb->h.raw + offset_seq);
return 0;
}
EXPORT_SYMBOL(xfrm_parse_spi);
@@ -82,8 +82,6 @@ void __init xfrm_input_init(void)
{
secpath_cachep = kmem_cache_create("secpath_cache",
sizeof(struct sec_path),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!secpath_cachep)
- panic("XFRM: failed to allocate secpath_cache\n");
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b8936926c24b..b6e2e79d7261 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -13,7 +13,6 @@
*
*/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
@@ -23,16 +22,19 @@
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/cache.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include "xfrm_hash.h"
+
DEFINE_MUTEX(xfrm_cfg_mutex);
EXPORT_SYMBOL(xfrm_cfg_mutex);
static DEFINE_RWLOCK(xfrm_policy_lock);
-struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list);
+unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_count);
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -40,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
static kmem_cache_t *xfrm_dst_cache __read_mostly;
static struct work_struct xfrm_policy_gc_work;
-static struct list_head xfrm_policy_gc_list =
- LIST_HEAD_INIT(xfrm_policy_gc_list);
+static HLIST_HEAD(xfrm_policy_gc_list);
static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -308,12 +309,13 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
{
struct xfrm_policy *policy;
- policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+ policy = kzalloc(sizeof(struct xfrm_policy), gfp);
if (policy) {
- memset(policy, 0, sizeof(struct xfrm_policy));
- atomic_set(&policy->refcnt, 1);
+ INIT_HLIST_NODE(&policy->bydst);
+ INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
+ atomic_set(&policy->refcnt, 1);
init_timer(&policy->timer);
policy->timer.data = (unsigned long)policy;
policy->timer.function = xfrm_policy_timer;
@@ -359,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
static void xfrm_policy_gc_task(void *data)
{
struct xfrm_policy *policy;
- struct list_head *entry, *tmp;
- struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+ struct hlist_node *entry, *tmp;
+ struct hlist_head gc_list;
spin_lock_bh(&xfrm_policy_gc_lock);
- list_splice_init(&xfrm_policy_gc_list, &gc_list);
+ gc_list.first = xfrm_policy_gc_list.first;
+ INIT_HLIST_HEAD(&xfrm_policy_gc_list);
spin_unlock_bh(&xfrm_policy_gc_lock);
- list_for_each_safe(entry, tmp, &gc_list) {
- policy = list_entry(entry, struct xfrm_policy, list);
+ hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
xfrm_policy_gc_kill(policy);
- }
}
/* Rule must be locked. Release descentant resources, announce
@@ -391,70 +392,275 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
}
spin_lock(&xfrm_policy_gc_lock);
- list_add(&policy->list, &xfrm_policy_gc_list);
+ hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
spin_unlock(&xfrm_policy_gc_lock);
schedule_work(&xfrm_policy_gc_work);
}
+struct xfrm_policy_hash {
+ struct hlist_head *table;
+ unsigned int hmask;
+};
+
+static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
+static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
+static struct hlist_head *xfrm_policy_byidx __read_mostly;
+static unsigned int xfrm_idx_hmask __read_mostly;
+static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+
+static inline unsigned int idx_hash(u32 index)
+{
+ return __idx_hash(index, xfrm_idx_hmask);
+}
+
+static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
+{
+ unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+ unsigned int hash = __sel_hash(sel, family, hmask);
+
+ return (hash == hmask + 1 ?
+ &xfrm_policy_inexact[dir] :
+ xfrm_policy_bydst[dir].table + hash);
+}
+
+static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
+{
+ unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+ unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+
+ return xfrm_policy_bydst[dir].table + hash;
+}
+
+static void xfrm_dst_hash_transfer(struct hlist_head *list,
+ struct hlist_head *ndsttable,
+ unsigned int nhashmask)
+{
+ struct hlist_node *entry, *tmp;
+ struct xfrm_policy *pol;
+
+ hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+ unsigned int h;
+
+ h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
+ pol->family, nhashmask);
+ hlist_add_head(&pol->bydst, ndsttable+h);
+ }
+}
+
+static void xfrm_idx_hash_transfer(struct hlist_head *list,
+ struct hlist_head *nidxtable,
+ unsigned int nhashmask)
+{
+ struct hlist_node *entry, *tmp;
+ struct xfrm_policy *pol;
+
+ hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+ unsigned int h;
+
+ h = __idx_hash(pol->index, nhashmask);
+ hlist_add_head(&pol->byidx, nidxtable+h);
+ }
+}
+
+static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
+{
+ return ((old_hmask + 1) << 1) - 1;
+}
+
+static void xfrm_bydst_resize(int dir)
+{
+ unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+ unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+ unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+ struct hlist_head *odst = xfrm_policy_bydst[dir].table;
+ struct hlist_head *ndst = xfrm_hash_alloc(nsize);
+ int i;
+
+ if (!ndst)
+ return;
+
+ write_lock_bh(&xfrm_policy_lock);
+
+ for (i = hmask; i >= 0; i--)
+ xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+
+ xfrm_policy_bydst[dir].table = ndst;
+ xfrm_policy_bydst[dir].hmask = nhashmask;
+
+ write_unlock_bh(&xfrm_policy_lock);
+
+ xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static void xfrm_byidx_resize(int total)
+{
+ unsigned int hmask = xfrm_idx_hmask;
+ unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+ unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+ struct hlist_head *oidx = xfrm_policy_byidx;
+ struct hlist_head *nidx = xfrm_hash_alloc(nsize);
+ int i;
+
+ if (!nidx)
+ return;
+
+ write_lock_bh(&xfrm_policy_lock);
+
+ for (i = hmask; i >= 0; i--)
+ xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
+
+ xfrm_policy_byidx = nidx;
+ xfrm_idx_hmask = nhashmask;
+
+ write_unlock_bh(&xfrm_policy_lock);
+
+ xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static inline int xfrm_bydst_should_resize(int dir, int *total)
+{
+ unsigned int cnt = xfrm_policy_count[dir];
+ unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+
+ if (total)
+ *total += cnt;
+
+ if ((hmask + 1) < xfrm_policy_hashmax &&
+ cnt > hmask)
+ return 1;
+
+ return 0;
+}
+
+static inline int xfrm_byidx_should_resize(int total)
+{
+ unsigned int hmask = xfrm_idx_hmask;
+
+ if ((hmask + 1) < xfrm_policy_hashmax &&
+ total > hmask)
+ return 1;
+
+ return 0;
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+ int dir, total;
+
+ mutex_lock(&hash_resize_mutex);
+
+ total = 0;
+ for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ if (xfrm_bydst_should_resize(dir, &total))
+ xfrm_bydst_resize(dir);
+ }
+ if (xfrm_byidx_should_resize(total))
+ xfrm_byidx_resize(total);
+
+ mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+
/* Generate new index... KAME seems to generate them ordered by cost
* of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(int dir)
+static u32 xfrm_gen_index(u8 type, int dir)
{
- u32 idx;
- struct xfrm_policy *p;
static u32 idx_generator;
for (;;) {
+ struct hlist_node *entry;
+ struct hlist_head *list;
+ struct xfrm_policy *p;
+ u32 idx;
+ int found;
+
idx = (idx_generator | dir);
idx_generator += 8;
if (idx == 0)
idx = 8;
- for (p = xfrm_policy_list[dir]; p; p = p->next) {
- if (p->index == idx)
+ list = xfrm_policy_byidx + idx_hash(idx);
+ found = 0;
+ hlist_for_each_entry(p, entry, list, byidx) {
+ if (p->index == idx) {
+ found = 1;
break;
+ }
}
- if (!p)
+ if (!found)
return idx;
}
}
+static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
+{
+ u32 *p1 = (u32 *) s1;
+ u32 *p2 = (u32 *) s2;
+ int len = sizeof(struct xfrm_selector) / sizeof(u32);
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (p1[i] != p2[i])
+ return 1;
+ }
+
+ return 0;
+}
+
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
- struct xfrm_policy *pol, **p;
- struct xfrm_policy *delpol = NULL;
- struct xfrm_policy **newpos = NULL;
+ struct xfrm_policy *pol;
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+ struct hlist_node *entry, *newpos, *last;
struct dst_entry *gc_list;
write_lock_bh(&xfrm_policy_lock);
- for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
- if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+ chain = policy_hash_bysel(&policy->selector, policy->family, dir);
+ delpol = NULL;
+ newpos = NULL;
+ last = NULL;
+ hlist_for_each_entry(pol, entry, chain, bydst) {
+ if (!delpol &&
+ pol->type == policy->type &&
+ !selector_cmp(&pol->selector, &policy->selector) &&
xfrm_sec_ctx_match(pol->security, policy->security)) {
if (excl) {
write_unlock_bh(&xfrm_policy_lock);
return -EEXIST;
}
- *p = pol->next;
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
- p = &pol->next;
+ last = &pol->bydst;
continue;
}
if (!newpos)
- newpos = p;
+ newpos = &pol->bydst;
if (delpol)
break;
- p = &pol->next;
+ last = &pol->bydst;
}
+ if (!newpos)
+ newpos = last;
if (newpos)
- p = newpos;
+ hlist_add_after(newpos, &policy->bydst);
+ else
+ hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
- policy->next = *p;
- *p = policy;
+ xfrm_policy_count[dir]++;
atomic_inc(&flow_cache_genid);
- policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+ if (delpol) {
+ hlist_del(&delpol->bydst);
+ hlist_del(&delpol->byidx);
+ xfrm_policy_count[dir]--;
+ }
+ policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
+ hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
policy->curlft.add_time = (unsigned long)xtime.tv_sec;
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -463,10 +669,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
if (delpol)
xfrm_policy_kill(delpol);
+ else if (xfrm_bydst_should_resize(dir, NULL))
+ schedule_work(&xfrm_hash_work);
read_lock_bh(&xfrm_policy_lock);
gc_list = NULL;
- for (policy = policy->next; policy; policy = policy->next) {
+ entry = &policy->bydst;
+ hlist_for_each_entry_continue(policy, entry, bydst) {
struct dst_entry *dst;
write_lock(&policy->lock);
@@ -495,87 +704,146 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+ struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete)
{
- struct xfrm_policy *pol, **p;
+ struct xfrm_policy *pol, *ret;
+ struct hlist_head *chain;
+ struct hlist_node *entry;
write_lock_bh(&xfrm_policy_lock);
- for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
- if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
- (xfrm_sec_ctx_match(ctx, pol->security))) {
+ chain = policy_hash_bysel(sel, sel->family, dir);
+ ret = NULL;
+ hlist_for_each_entry(pol, entry, chain, bydst) {
+ if (pol->type == type &&
+ !selector_cmp(sel, &pol->selector) &&
+ xfrm_sec_ctx_match(ctx, pol->security)) {
xfrm_pol_hold(pol);
- if (delete)
- *p = pol->next;
+ if (delete) {
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
+ xfrm_policy_count[dir]--;
+ }
+ ret = pol;
break;
}
}
write_unlock_bh(&xfrm_policy_lock);
- if (pol && delete) {
+ if (ret && delete) {
atomic_inc(&flow_cache_genid);
- xfrm_policy_kill(pol);
+ xfrm_policy_kill(ret);
}
- return pol;
+ return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
{
- struct xfrm_policy *pol, **p;
+ struct xfrm_policy *pol, *ret;
+ struct hlist_head *chain;
+ struct hlist_node *entry;
write_lock_bh(&xfrm_policy_lock);
- for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
- if (pol->index == id) {
+ chain = xfrm_policy_byidx + idx_hash(id);
+ ret = NULL;
+ hlist_for_each_entry(pol, entry, chain, byidx) {
+ if (pol->type == type && pol->index == id) {
xfrm_pol_hold(pol);
- if (delete)
- *p = pol->next;
+ if (delete) {
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
+ xfrm_policy_count[dir]--;
+ }
+ ret = pol;
break;
}
}
write_unlock_bh(&xfrm_policy_lock);
- if (pol && delete) {
+ if (ret && delete) {
atomic_inc(&flow_cache_genid);
- xfrm_policy_kill(pol);
+ xfrm_policy_kill(ret);
}
- return pol;
+ return ret;
}
EXPORT_SYMBOL(xfrm_policy_byid);
-void xfrm_policy_flush(void)
+void xfrm_policy_flush(u8 type)
{
- struct xfrm_policy *xp;
int dir;
write_lock_bh(&xfrm_policy_lock);
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- while ((xp = xfrm_policy_list[dir]) != NULL) {
- xfrm_policy_list[dir] = xp->next;
+ struct xfrm_policy *pol;
+ struct hlist_node *entry;
+ int i;
+
+ again1:
+ hlist_for_each_entry(pol, entry,
+ &xfrm_policy_inexact[dir], bydst) {
+ if (pol->type != type)
+ continue;
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
write_unlock_bh(&xfrm_policy_lock);
- xfrm_policy_kill(xp);
+ xfrm_policy_kill(pol);
write_lock_bh(&xfrm_policy_lock);
+ goto again1;
+ }
+
+ for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+ again2:
+ hlist_for_each_entry(pol, entry,
+ xfrm_policy_bydst[dir].table + i,
+ bydst) {
+ if (pol->type != type)
+ continue;
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
+ write_unlock_bh(&xfrm_policy_lock);
+
+ xfrm_policy_kill(pol);
+
+ write_lock_bh(&xfrm_policy_lock);
+ goto again2;
+ }
}
+
+ xfrm_policy_count[dir] = 0;
}
atomic_inc(&flow_cache_genid);
write_unlock_bh(&xfrm_policy_lock);
}
EXPORT_SYMBOL(xfrm_policy_flush);
-int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
void *data)
{
- struct xfrm_policy *xp;
- int dir;
- int count = 0;
- int error = 0;
+ struct xfrm_policy *pol;
+ struct hlist_node *entry;
+ int dir, count, error;
read_lock_bh(&xfrm_policy_lock);
+ count = 0;
for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
- for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
- count++;
+ struct hlist_head *table = xfrm_policy_bydst[dir].table;
+ int i;
+
+ hlist_for_each_entry(pol, entry,
+ &xfrm_policy_inexact[dir], bydst) {
+ if (pol->type == type)
+ count++;
+ }
+ for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+ hlist_for_each_entry(pol, entry, table + i, bydst) {
+ if (pol->type == type)
+ count++;
+ }
+ }
}
if (count == 0) {
@@ -584,13 +852,28 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
}
for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
- for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
- error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+ struct hlist_head *table = xfrm_policy_bydst[dir].table;
+ int i;
+
+ hlist_for_each_entry(pol, entry,
+ &xfrm_policy_inexact[dir], bydst) {
+ if (pol->type != type)
+ continue;
+ error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
if (error)
goto out;
}
+ for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+ hlist_for_each_entry(pol, entry, table + i, bydst) {
+ if (pol->type != type)
+ continue;
+ error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
+ if (error)
+ goto out;
+ }
+ }
}
-
+ error = 0;
out:
read_unlock_bh(&xfrm_policy_lock);
return error;
@@ -599,29 +882,79 @@ EXPORT_SYMBOL(xfrm_policy_walk);
/* Find policy to apply to this flow. */
-static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
- void **objp, atomic_t **obj_refp)
+static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
+ u8 type, u16 family, int dir)
{
- struct xfrm_policy *pol;
+ struct xfrm_selector *sel = &pol->selector;
+ int match;
- read_lock_bh(&xfrm_policy_lock);
- for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
- struct xfrm_selector *sel = &pol->selector;
- int match;
+ if (pol->family != family ||
+ pol->type != type)
+ return 0;
- if (pol->family != family)
- continue;
+ match = xfrm_selector_match(sel, fl, family);
+ if (match) {
+ if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
+ return 1;
+ }
- match = xfrm_selector_match(sel, fl, family);
+ return 0;
+}
- if (match) {
- if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
- xfrm_pol_hold(pol);
- break;
- }
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+ u16 family, u8 dir)
+{
+ struct xfrm_policy *pol, *ret;
+ xfrm_address_t *daddr, *saddr;
+ struct hlist_node *entry;
+ struct hlist_head *chain;
+ u32 priority = ~0U;
+
+ daddr = xfrm_flowi_daddr(fl, family);
+ saddr = xfrm_flowi_saddr(fl, family);
+ if (unlikely(!daddr || !saddr))
+ return NULL;
+
+ read_lock_bh(&xfrm_policy_lock);
+ chain = policy_hash_direct(daddr, saddr, family, dir);
+ ret = NULL;
+ hlist_for_each_entry(pol, entry, chain, bydst) {
+ if (xfrm_policy_match(pol, fl, type, family, dir)) {
+ ret = pol;
+ priority = ret->priority;
+ break;
}
}
+ chain = &xfrm_policy_inexact[dir];
+ hlist_for_each_entry(pol, entry, chain, bydst) {
+ if (xfrm_policy_match(pol, fl, type, family, dir) &&
+ pol->priority < priority) {
+ ret = pol;
+ break;
+ }
+ }
+ if (ret)
+ xfrm_pol_hold(ret);
read_unlock_bh(&xfrm_policy_lock);
+
+ return ret;
+}
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+ void **objp, atomic_t **obj_refp)
+{
+ struct xfrm_policy *pol;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+ pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
+ if (pol)
+ goto end;
+#endif
+ pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+end:
+#endif
if ((*objp = (void *) pol) != NULL)
*obj_refp = &pol->refcnt;
}
@@ -643,7 +976,7 @@ static inline int policy_to_flow_dir(int dir)
};
}
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
{
struct xfrm_policy *pol;
@@ -654,7 +987,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
int err = 0;
if (match)
- err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+ err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
if (match && !err)
xfrm_pol_hold(pol);
@@ -667,24 +1000,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
- pol->next = xfrm_policy_list[dir];
- xfrm_policy_list[dir] = pol;
+ struct hlist_head *chain = policy_hash_bysel(&pol->selector,
+ pol->family, dir);
+
+ hlist_add_head(&pol->bydst, chain);
+ hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+ xfrm_policy_count[dir]++;
xfrm_pol_hold(pol);
+
+ if (xfrm_bydst_should_resize(dir, NULL))
+ schedule_work(&xfrm_hash_work);
}
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir)
{
- struct xfrm_policy **polp;
+ if (hlist_unhashed(&pol->bydst))
+ return NULL;
- for (polp = &xfrm_policy_list[dir];
- *polp != NULL; polp = &(*polp)->next) {
- if (*polp == pol) {
- *polp = pol->next;
- return pol;
- }
- }
- return NULL;
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
+ xfrm_policy_count[dir]--;
+
+ return pol;
}
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -706,12 +1044,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
struct xfrm_policy *old_pol;
+#ifdef CONFIG_XFRM_SUB_POLICY
+ if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
+ return -EINVAL;
+#endif
+
write_lock_bh(&xfrm_policy_lock);
old_pol = sk->sk_policy[dir];
sk->sk_policy[dir] = pol;
if (pol) {
pol->curlft.add_time = (unsigned long)xtime.tv_sec;
- pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+ pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
if (old_pol)
@@ -740,6 +1083,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
newp->flags = old->flags;
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
+ newp->type = old->type;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
write_lock_bh(&xfrm_policy_lock);
@@ -763,17 +1107,32 @@ int __xfrm_sk_clone_policy(struct sock *sk)
return 0;
}
+static int
+xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
+ unsigned short family)
+{
+ int err;
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ err = afinfo->get_saddr(local, remote);
+ xfrm_policy_put_afinfo(afinfo);
+ return err;
+}
+
/* Resolve list of templates for the flow, given policy. */
static int
-xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
- struct xfrm_state **xfrm,
- unsigned short family)
+xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
+ struct xfrm_state **xfrm,
+ unsigned short family)
{
int nx;
int i, error;
xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+ xfrm_address_t tmp;
for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
struct xfrm_state *x;
@@ -781,9 +1140,15 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
xfrm_address_t *local = saddr;
struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
- if (tmpl->mode) {
+ if (tmpl->mode == XFRM_MODE_TUNNEL) {
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
+ if (xfrm_addr_any(local, family)) {
+ error = xfrm_get_saddr(&tmp, remote, family);
+ if (error)
+ goto fail;
+ local = &tmp;
+ }
}
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
@@ -811,6 +1176,45 @@ fail:
return error;
}
+static int
+xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
+ struct xfrm_state **xfrm,
+ unsigned short family)
+{
+ struct xfrm_state *tp[XFRM_MAX_DEPTH];
+ struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
+ int cnx = 0;
+ int error;
+ int ret;
+ int i;
+
+ for (i = 0; i < npols; i++) {
+ if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
+ error = -ENOBUFS;
+ goto fail;
+ }
+
+ ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
+ if (ret < 0) {
+ error = ret;
+ goto fail;
+ } else
+ cnx += ret;
+ }
+
+ /* found states are sorted for outbound processing */
+ if (npols > 1)
+ xfrm_state_sort(xfrm, tpp, cnx, family);
+
+ return cnx;
+
+ fail:
+ for (cnx--; cnx>=0; cnx--)
+ xfrm_state_put(tpp[cnx]);
+ return error;
+
+}
+
/* Check that the bundle accepts the flow and its components are
* still valid.
*/
@@ -857,6 +1261,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
struct sock *sk, int flags)
{
struct xfrm_policy *policy;
+ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+ int npols;
+ int pol_dead;
+ int xfrm_nr;
+ int pi;
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct dst_entry *dst, *dst_orig = *dst_p;
int nx = 0;
@@ -864,19 +1273,26 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
u32 genid;
u16 family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
- u32 sk_sid = security_sk_sid(sk, fl, dir);
+
restart:
genid = atomic_read(&flow_cache_genid);
policy = NULL;
+ for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+ pols[pi] = NULL;
+ npols = 0;
+ pol_dead = 0;
+ xfrm_nr = 0;
+
if (sk && sk->sk_policy[1])
- policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
+ policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
if (!policy) {
/* To accelerate a bit... */
- if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+ if ((dst_orig->flags & DST_NOXFRM) ||
+ !xfrm_policy_count[XFRM_POLICY_OUT])
return 0;
- policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+ policy = flow_cache_lookup(fl, dst_orig->ops->family,
dir, xfrm_policy_lookup);
}
@@ -885,6 +1301,9 @@ restart:
family = dst_orig->ops->family;
policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pols[0] = policy;
+ npols ++;
+ xfrm_nr += pols[0]->xfrm_nr;
switch (policy->action) {
case XFRM_POLICY_BLOCK:
@@ -893,11 +1312,13 @@ restart:
goto error;
case XFRM_POLICY_ALLOW:
+#ifndef CONFIG_XFRM_SUB_POLICY
if (policy->xfrm_nr == 0) {
/* Flow passes not transformed. */
xfrm_pol_put(policy);
return 0;
}
+#endif
/* Try to find matching bundle.
*
@@ -913,7 +1334,36 @@ restart:
if (dst)
break;
- nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+ if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+ pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+ fl, family,
+ XFRM_POLICY_OUT);
+ if (pols[1]) {
+ if (pols[1]->action == XFRM_POLICY_BLOCK) {
+ err = -EPERM;
+ goto error;
+ }
+ npols ++;
+ xfrm_nr += pols[1]->xfrm_nr;
+ }
+ }
+
+ /*
+ * Because neither flowi nor bundle information knows about
+ * transformation template size. On more than one policy usage
+ * we can realize whether all of them is bypass or not after
+ * they are searched. See above not-transformed bypass
+ * is surrounded by non-sub policy configuration, too.
+ */
+ if (xfrm_nr == 0) {
+ /* Flow passes not transformed. */
+ xfrm_pols_put(pols, npols);
+ return 0;
+ }
+
+#endif
+ nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
if (unlikely(nx<0)) {
err = nx;
@@ -926,7 +1376,7 @@ restart:
set_current_state(TASK_RUNNING);
remove_wait_queue(&km_waitq, &wait);
- nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+ nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
if (nx == -EAGAIN && signal_pending(current)) {
err = -ERESTART;
@@ -934,7 +1384,7 @@ restart:
}
if (nx == -EAGAIN ||
genid != atomic_read(&flow_cache_genid)) {
- xfrm_pol_put(policy);
+ xfrm_pols_put(pols, npols);
goto restart;
}
err = nx;
@@ -944,7 +1394,7 @@ restart:
}
if (nx == 0) {
/* Flow passes not transformed. */
- xfrm_pol_put(policy);
+ xfrm_pols_put(pols, npols);
return 0;
}
@@ -958,8 +1408,14 @@ restart:
goto error;
}
+ for (pi = 0; pi < npols; pi++) {
+ read_lock_bh(&pols[pi]->lock);
+ pol_dead |= pols[pi]->dead;
+ read_unlock_bh(&pols[pi]->lock);
+ }
+
write_lock_bh(&policy->lock);
- if (unlikely(policy->dead || stale_bundle(dst))) {
+ if (unlikely(pol_dead || stale_bundle(dst))) {
/* Wow! While we worked on resolving, this
* policy has gone. Retry. It is not paranoia,
* we just cannot enlist new bundle to dead object.
@@ -979,17 +1435,34 @@ restart:
}
*dst_p = dst;
dst_release(dst_orig);
- xfrm_pol_put(policy);
+ xfrm_pols_put(pols, npols);
return 0;
error:
dst_release(dst_orig);
- xfrm_pol_put(policy);
+ xfrm_pols_put(pols, npols);
*dst_p = NULL;
return err;
}
EXPORT_SYMBOL(xfrm_lookup);
+static inline int
+xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
+{
+ struct xfrm_state *x;
+ int err;
+
+ if (!skb->sp || idx < 0 || idx >= skb->sp->len)
+ return 0;
+ x = skb->sp->xvec[idx];
+ if (!x->type->reject)
+ return 0;
+ xfrm_state_hold(x);
+ err = x->type->reject(x, skb, fl);
+ xfrm_state_put(x);
+ return err;
+}
+
/* When skb is transformed back to its "native" form, we have to
* check policy restrictions. At the moment we make this in maximally
* stupid way. Shame on me. :-) Of course, connected sockets must
@@ -1006,10 +1479,19 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
x->props.mode == tmpl->mode &&
- (tmpl->aalgos & (1<<x->props.aalgo)) &&
- !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+ ((tmpl->aalgos & (1<<x->props.aalgo)) ||
+ !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
+ !(x->props.mode != XFRM_MODE_TRANSPORT &&
+ xfrm_state_addr_cmp(tmpl, x, family));
}
+/*
+ * 0 or more than 0 is returned when validation is succeeded (either bypass
+ * because of optional transport mode, or next index of the mathced secpath
+ * state with the template.
+ * -1 is returned when no matching template is found.
+ * Otherwise "-2 - errored_index" is returned.
+ */
static inline int
xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
unsigned short family)
@@ -1017,15 +1499,18 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
int idx = start;
if (tmpl->optional) {
- if (!tmpl->mode)
+ if (tmpl->mode == XFRM_MODE_TRANSPORT)
return start;
} else
start = -1;
for (; idx < sp->len; idx++) {
if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
return ++idx;
- if (sp->xvec[idx]->props.mode)
+ if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+ if (start == -1)
+ start = -2-idx;
break;
+ }
}
return start;
}
@@ -1034,21 +1519,25 @@ int
xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
{
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ int err;
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
afinfo->decode_session(skb, fl);
+ err = security_xfrm_decode_session(skb, &fl->secid);
xfrm_policy_put_afinfo(afinfo);
- return 0;
+ return err;
}
EXPORT_SYMBOL(xfrm_decode_session);
-static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
{
for (; k < sp->len; k++) {
- if (sp->xvec[k]->props.mode)
+ if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
+ *idxp = k;
return 1;
+ }
}
return 0;
@@ -1058,16 +1547,18 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
unsigned short family)
{
struct xfrm_policy *pol;
+ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+ int npols = 0;
+ int xfrm_nr;
+ int pi;
struct flowi fl;
u8 fl_dir = policy_to_flow_dir(dir);
- u32 sk_sid;
+ int xerr_idx = -1;
if (xfrm_decode_session(skb, &fl, family) < 0)
return 0;
nf_nat_decode_session(skb, &fl, family);
- sk_sid = security_sk_sid(sk, &fl, fl_dir);
-
/* First, check used SA against their selectors. */
if (skb->sp) {
int i;
@@ -1081,46 +1572,90 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = NULL;
if (sk && sk->sk_policy[dir])
- pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl);
if (!pol)
- pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
+ pol = flow_cache_lookup(&fl, family, fl_dir,
xfrm_policy_lookup);
- if (!pol)
- return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+ if (!pol) {
+ if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
+ xfrm_secpath_reject(xerr_idx, skb, &fl);
+ return 0;
+ }
+ return 1;
+ }
pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pols[0] = pol;
+ npols ++;
+#ifdef CONFIG_XFRM_SUB_POLICY
+ if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+ pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+ &fl, family,
+ XFRM_POLICY_IN);
+ if (pols[1]) {
+ pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+ npols ++;
+ }
+ }
+#endif
+
if (pol->action == XFRM_POLICY_ALLOW) {
struct sec_path *sp;
static struct sec_path dummy;
+ struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+ struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
+ struct xfrm_tmpl **tpp = tp;
+ int ti = 0;
int i, k;
if ((sp = skb->sp) == NULL)
sp = &dummy;
+ for (pi = 0; pi < npols; pi++) {
+ if (pols[pi] != pol &&
+ pols[pi]->action != XFRM_POLICY_ALLOW)
+ goto reject;
+ if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
+ goto reject_error;
+ for (i = 0; i < pols[pi]->xfrm_nr; i++)
+ tpp[ti++] = &pols[pi]->xfrm_vec[i];
+ }
+ xfrm_nr = ti;
+ if (npols > 1) {
+ xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
+ tpp = stp;
+ }
+
/* For each tunnel xfrm, find the first matching tmpl.
* For each tmpl before that, find corresponding xfrm.
* Order is _important_. Later we will implement
* some barriers, but at the moment barriers
* are implied between each two transformations.
*/
- for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
- k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
- if (k < 0)
+ for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
+ k = xfrm_policy_ok(tpp[i], sp, k, family);
+ if (k < 0) {
+ if (k < -1)
+ /* "-2 - errored_index" returned */
+ xerr_idx = -(2+k);
goto reject;
+ }
}
- if (secpath_has_tunnel(sp, k))
+ if (secpath_has_nontransport(sp, k, &xerr_idx))
goto reject;
- xfrm_pol_put(pol);
+ xfrm_pols_put(pols, npols);
return 1;
}
reject:
- xfrm_pol_put(pol);
+ xfrm_secpath_reject(xerr_idx, skb, &fl);
+reject_error:
+ xfrm_pols_put(pols, npols);
return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);
@@ -1136,18 +1671,39 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
}
EXPORT_SYMBOL(__xfrm_route_forward);
+/* Optimize later using cookies and generation ids. */
+
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
- /* If it is marked obsolete, which is how we even get here,
- * then we have purged it from the policy bundle list and we
- * did that for a good reason.
+ /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
+ * to "-1" to force all XFRM destinations to get validated by
+ * dst_ops->check on every use. We do this because when a
+ * normal route referenced by an XFRM dst is obsoleted we do
+ * not go looking around for all parent referencing XFRM dsts
+ * so that we can invalidate them. It is just too much work.
+ * Instead we make the checks here on every use. For example:
+ *
+ * XFRM dst A --> IPv4 dst X
+ *
+ * X is the "xdst->route" of A (X is also the "dst->path" of A
+ * in this example). If X is marked obsolete, "A" will not
+ * notice. That's what we are validating here via the
+ * stale_bundle() check.
+ *
+ * When a policy's bundle is pruned, we dst_free() the XFRM
+ * dst which causes it's ->obsolete field to be set to a
+ * positive non-zero integer. If an XFRM dst has been pruned
+ * like this, we want to force a new route lookup.
*/
+ if (dst->obsolete < 0 && !stale_bundle(dst))
+ return dst;
+
return NULL;
}
static int stale_bundle(struct dst_entry *dst)
{
- return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+ return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
}
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1177,33 +1733,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
+static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+{
+ struct dst_entry *dst, **dstp;
+
+ write_lock(&pol->lock);
+ dstp = &pol->bundles;
+ while ((dst=*dstp) != NULL) {
+ if (func(dst)) {
+ *dstp = dst->next;
+ dst->next = *gc_list_p;
+ *gc_list_p = dst;
+ } else {
+ dstp = &dst->next;
+ }
+ }
+ write_unlock(&pol->lock);
+}
+
static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
{
- int i;
- struct xfrm_policy *pol;
- struct dst_entry *dst, **dstp, *gc_list = NULL;
+ struct dst_entry *gc_list = NULL;
+ int dir;
read_lock_bh(&xfrm_policy_lock);
- for (i=0; i<2*XFRM_POLICY_MAX; i++) {
- for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
- write_lock(&pol->lock);
- dstp = &pol->bundles;
- while ((dst=*dstp) != NULL) {
- if (func(dst)) {
- *dstp = dst->next;
- dst->next = gc_list;
- gc_list = dst;
- } else {
- dstp = &dst->next;
- }
- }
- write_unlock(&pol->lock);
+ for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ struct xfrm_policy *pol;
+ struct hlist_node *entry;
+ struct hlist_head *table;
+ int i;
+
+ hlist_for_each_entry(pol, entry,
+ &xfrm_policy_inexact[dir], bydst)
+ prune_one_bundle(pol, func, &gc_list);
+
+ table = xfrm_policy_bydst[dir].table;
+ for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+ hlist_for_each_entry(pol, entry, table + i, bydst)
+ prune_one_bundle(pol, func, &gc_list);
}
}
read_unlock_bh(&xfrm_policy_lock);
while (gc_list) {
- dst = gc_list;
+ struct dst_entry *dst = gc_list;
gc_list = dst->next;
dst_free(dst);
}
@@ -1219,22 +1792,12 @@ static void __xfrm_garbage_collect(void)
xfrm_prune_bundles(unused_bundle);
}
-int xfrm_flush_bundles(void)
+static int xfrm_flush_bundles(void)
{
xfrm_prune_bundles(stale_bundle);
return 0;
}
-static int always_true(struct dst_entry *dst)
-{
- return 1;
-}
-
-void xfrm_flush_all_bundles(void)
-{
- xfrm_prune_bundles(always_true);
-}
-
void xfrm_init_pmtu(struct dst_entry *dst)
{
do {
@@ -1262,7 +1825,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu);
* still valid.
*/
-int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
{
struct dst_entry *dst = &first->u.dst;
struct xfrm_dst *last;
@@ -1279,8 +1842,16 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
return 0;
+ if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+ return 0;
if (dst->xfrm->km.state != XFRM_STATE_VALID)
return 0;
+ if (xdst->genid != dst->xfrm->genid)
+ return 0;
+
+ if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
+ !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
+ return 0;
mtu = dst_mtu(dst->child);
if (xdst->child_mtu_cached != mtu) {
@@ -1429,12 +2000,33 @@ static struct notifier_block xfrm_dev_notifier = {
static void __init xfrm_policy_init(void)
{
+ unsigned int hmask, sz;
+ int dir;
+
xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
sizeof(struct xfrm_dst),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!xfrm_dst_cache)
- panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+ hmask = 8 - 1;
+ sz = (hmask+1) * sizeof(struct hlist_head);
+
+ xfrm_policy_byidx = xfrm_hash_alloc(sz);
+ xfrm_idx_hmask = hmask;
+ if (!xfrm_policy_byidx)
+ panic("XFRM: failed to allocate byidx hash\n");
+
+ for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ struct xfrm_policy_hash *htab;
+
+ INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
+
+ htab = &xfrm_policy_bydst[dir];
+ htab->table = xfrm_hash_alloc(sz);
+ htab->hmask = hmask;
+ if (!htab->table)
+ panic("XFRM: failed to allocate bydst hash\n");
+ }
INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
register_netdevice_notifier(&xfrm_dev_notifier);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 17b29ec3c417..f927b7330f02 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,8 +18,11 @@
#include <linux/pfkeyv2.h>
#include <linux/ipsec.h>
#include <linux/module.h>
+#include <linux/cache.h>
#include <asm/uaccess.h>
+#include "xfrm_hash.h"
+
struct sock *xfrm_nl;
EXPORT_SYMBOL(xfrm_nl);
@@ -32,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
/* Each xfrm_state may be linked to two tables:
1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
- 2. Hash table by daddr to find what SAs exist for given
+ 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
destination/tunnel endpoint. (output)
*/
@@ -44,8 +47,126 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
* Main use is finding SA after policy selected tunnel or transport mode.
* Also, it can be used by ah/esp icmp error handler to find offending SA.
*/
-static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
-static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+static struct hlist_head *xfrm_state_bydst __read_mostly;
+static struct hlist_head *xfrm_state_bysrc __read_mostly;
+static struct hlist_head *xfrm_state_byspi __read_mostly;
+static unsigned int xfrm_state_hmask __read_mostly;
+static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static unsigned int xfrm_state_num;
+static unsigned int xfrm_state_genid;
+
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
+ xfrm_address_t *saddr,
+ u32 reqid,
+ unsigned short family)
+{
+ return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
+}
+
+static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+ unsigned short family)
+{
+ return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+}
+
+static inline unsigned int
+xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+{
+ return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
+}
+
+static void xfrm_hash_transfer(struct hlist_head *list,
+ struct hlist_head *ndsttable,
+ struct hlist_head *nsrctable,
+ struct hlist_head *nspitable,
+ unsigned int nhashmask)
+{
+ struct hlist_node *entry, *tmp;
+ struct xfrm_state *x;
+
+ hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
+ unsigned int h;
+
+ h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+ x->props.reqid, x->props.family,
+ nhashmask);
+ hlist_add_head(&x->bydst, ndsttable+h);
+
+ h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+ nhashmask);
+ hlist_add_head(&x->bysrc, nsrctable+h);
+
+ if (x->id.spi) {
+ h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
+ x->id.proto, x->props.family,
+ nhashmask);
+ hlist_add_head(&x->byspi, nspitable+h);
+ }
+ }
+}
+
+static unsigned long xfrm_hash_new_size(void)
+{
+ return ((xfrm_state_hmask + 1) << 1) *
+ sizeof(struct hlist_head);
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+ struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+ unsigned long nsize, osize;
+ unsigned int nhashmask, ohashmask;
+ int i;
+
+ mutex_lock(&hash_resize_mutex);
+
+ nsize = xfrm_hash_new_size();
+ ndst = xfrm_hash_alloc(nsize);
+ if (!ndst)
+ goto out_unlock;
+ nsrc = xfrm_hash_alloc(nsize);
+ if (!nsrc) {
+ xfrm_hash_free(ndst, nsize);
+ goto out_unlock;
+ }
+ nspi = xfrm_hash_alloc(nsize);
+ if (!nspi) {
+ xfrm_hash_free(ndst, nsize);
+ xfrm_hash_free(nsrc, nsize);
+ goto out_unlock;
+ }
+
+ spin_lock_bh(&xfrm_state_lock);
+
+ nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+ for (i = xfrm_state_hmask; i >= 0; i--)
+ xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
+ nhashmask);
+
+ odst = xfrm_state_bydst;
+ osrc = xfrm_state_bysrc;
+ ospi = xfrm_state_byspi;
+ ohashmask = xfrm_state_hmask;
+
+ xfrm_state_bydst = ndst;
+ xfrm_state_bysrc = nsrc;
+ xfrm_state_byspi = nspi;
+ xfrm_state_hmask = nhashmask;
+
+ spin_unlock_bh(&xfrm_state_lock);
+
+ osize = (ohashmask + 1) * sizeof(struct hlist_head);
+ xfrm_hash_free(odst, osize);
+ xfrm_hash_free(osrc, osize);
+ xfrm_hash_free(ospi, osize);
+
+out_unlock:
+ mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
DECLARE_WAIT_QUEUE_HEAD(km_waitq);
EXPORT_SYMBOL(km_waitq);
@@ -54,11 +175,9 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
static struct work_struct xfrm_state_gc_work;
-static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
static DEFINE_SPINLOCK(xfrm_state_gc_lock);
-static int xfrm_state_gc_flush_bundles;
-
int __xfrm_state_delete(struct xfrm_state *x);
static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -69,14 +188,13 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
- if (del_timer(&x->timer))
- BUG();
- if (del_timer(&x->rtimer))
- BUG();
+ del_timer_sync(&x->timer);
+ del_timer_sync(&x->rtimer);
kfree(x->aalg);
kfree(x->ealg);
kfree(x->calg);
kfree(x->encap);
+ kfree(x->coaddr);
if (x->mode)
xfrm_put_mode(x->mode);
if (x->type) {
@@ -90,22 +208,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
static void xfrm_state_gc_task(void *data)
{
struct xfrm_state *x;
- struct list_head *entry, *tmp;
- struct list_head gc_list = LIST_HEAD_INIT(gc_list);
-
- if (xfrm_state_gc_flush_bundles) {
- xfrm_state_gc_flush_bundles = 0;
- xfrm_flush_bundles();
- }
+ struct hlist_node *entry, *tmp;
+ struct hlist_head gc_list;
spin_lock_bh(&xfrm_state_gc_lock);
- list_splice_init(&xfrm_state_gc_list, &gc_list);
+ gc_list.first = xfrm_state_gc_list.first;
+ INIT_HLIST_HEAD(&xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
- list_for_each_safe(entry, tmp, &gc_list) {
- x = list_entry(entry, struct xfrm_state, bydst);
+ hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
xfrm_state_gc_destroy(x);
- }
+
wake_up(&km_waitq);
}
@@ -168,9 +281,9 @@ static void xfrm_timer_handler(unsigned long data)
if (warn)
km_state_expired(x, 0, 0);
resched:
- if (next != LONG_MAX &&
- !mod_timer(&x->timer, jiffies + make_jiffies(next)))
- xfrm_state_hold(x);
+ if (next != LONG_MAX)
+ mod_timer(&x->timer, jiffies + make_jiffies(next));
+
goto out;
expired:
@@ -185,7 +298,6 @@ expired:
out:
spin_unlock(&x->lock);
- xfrm_state_put(x);
}
static void xfrm_replay_timer_handler(unsigned long data);
@@ -194,14 +306,14 @@ struct xfrm_state *xfrm_state_alloc(void)
{
struct xfrm_state *x;
- x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+ x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
if (x) {
- memset(x, 0, sizeof(struct xfrm_state));
atomic_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
- INIT_LIST_HEAD(&x->bydst);
- INIT_LIST_HEAD(&x->byspi);
+ INIT_HLIST_NODE(&x->bydst);
+ INIT_HLIST_NODE(&x->bysrc);
+ INIT_HLIST_NODE(&x->byspi);
init_timer(&x->timer);
x->timer.function = xfrm_timer_handler;
x->timer.data = (unsigned long)x;
@@ -226,7 +338,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
spin_lock_bh(&xfrm_state_gc_lock);
- list_add(&x->bydst, &xfrm_state_gc_list);
+ hlist_add_head(&x->bydst, &xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
schedule_work(&xfrm_state_gc_work);
}
@@ -239,27 +351,12 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
spin_lock(&xfrm_state_lock);
- list_del(&x->bydst);
- __xfrm_state_put(x);
- if (x->id.spi) {
- list_del(&x->byspi);
- __xfrm_state_put(x);
- }
+ hlist_del(&x->bydst);
+ hlist_del(&x->bysrc);
+ if (x->id.spi)
+ hlist_del(&x->byspi);
+ xfrm_state_num--;
spin_unlock(&xfrm_state_lock);
- if (del_timer(&x->timer))
- __xfrm_state_put(x);
- if (del_timer(&x->rtimer))
- __xfrm_state_put(x);
-
- /* The number two in this test is the reference
- * mentioned in the comment below plus the reference
- * our caller holds. A larger value means that
- * there are DSTs attached to this xfrm_state.
- */
- if (atomic_read(&x->refcnt) > 2) {
- xfrm_state_gc_flush_bundles = 1;
- schedule_work(&xfrm_state_gc_work);
- }
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
@@ -288,14 +385,15 @@ EXPORT_SYMBOL(xfrm_state_delete);
void xfrm_state_flush(u8 proto)
{
int i;
- struct xfrm_state *x;
spin_lock_bh(&xfrm_state_lock);
- for (i = 0; i < XFRM_DST_HSIZE; i++) {
+ for (i = 0; i <= xfrm_state_hmask; i++) {
+ struct hlist_node *entry;
+ struct xfrm_state *x;
restart:
- list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
if (!xfrm_state_kern(x) &&
- (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+ xfrm_id_proto_match(x->id.proto, proto)) {
xfrm_state_hold(x);
spin_unlock_bh(&xfrm_state_lock);
@@ -326,29 +424,103 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
return 0;
}
+static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+{
+ unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
+ struct xfrm_state *x;
+ struct hlist_node *entry;
+
+ hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto)
+ continue;
+
+ switch (family) {
+ case AF_INET:
+ if (x->id.daddr.a4 != daddr->a4)
+ continue;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal((struct in6_addr *)daddr,
+ (struct in6_addr *)
+ x->id.daddr.a6))
+ continue;
+ break;
+ };
+
+ xfrm_state_hold(x);
+ return x;
+ }
+
+ return NULL;
+}
+
+static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+{
+ unsigned int h = xfrm_src_hash(saddr, family);
+ struct xfrm_state *x;
+ struct hlist_node *entry;
+
+ hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+ if (x->props.family != family ||
+ x->id.proto != proto)
+ continue;
+
+ switch (family) {
+ case AF_INET:
+ if (x->id.daddr.a4 != daddr->a4 ||
+ x->props.saddr.a4 != saddr->a4)
+ continue;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal((struct in6_addr *)daddr,
+ (struct in6_addr *)
+ x->id.daddr.a6) ||
+ !ipv6_addr_equal((struct in6_addr *)saddr,
+ (struct in6_addr *)
+ x->props.saddr.a6))
+ continue;
+ break;
+ };
+
+ xfrm_state_hold(x);
+ return x;
+ }
+
+ return NULL;
+}
+
+static inline struct xfrm_state *
+__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
+{
+ if (use_spi)
+ return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
+ x->id.proto, family);
+ else
+ return __xfrm_state_lookup_byaddr(&x->id.daddr,
+ &x->props.saddr,
+ x->id.proto, family);
+}
+
struct xfrm_state *
xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
struct flowi *fl, struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err,
unsigned short family)
{
- unsigned h = xfrm_dst_hash(daddr, family);
+ unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
+ struct hlist_node *entry;
struct xfrm_state *x, *x0;
int acquire_in_progress = 0;
int error = 0;
struct xfrm_state *best = NULL;
- struct xfrm_state_afinfo *afinfo;
- afinfo = xfrm_state_get_afinfo(family);
- if (afinfo == NULL) {
- *err = -EAFNOSUPPORT;
- return NULL;
- }
-
spin_lock_bh(&xfrm_state_lock);
- list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == tmpl->reqid &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, family) &&
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
@@ -368,7 +540,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
*/
if (x->km.state == XFRM_STATE_VALID) {
if (!xfrm_selector_match(&x->sel, fl, family) ||
- !xfrm_sec_ctx_match(pol->security, x->security))
+ !security_xfrm_state_pol_flow_match(x, pol, fl))
continue;
if (!best ||
best->km.dying > x->km.dying ||
@@ -380,7 +552,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
} else if (x->km.state == XFRM_STATE_ERROR ||
x->km.state == XFRM_STATE_EXPIRED) {
if (xfrm_selector_match(&x->sel, fl, family) &&
- xfrm_sec_ctx_match(pol->security, x->security))
+ security_xfrm_state_pol_flow_match(x, pol, fl))
error = -ESRCH;
}
}
@@ -389,8 +561,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
x = best;
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
- (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
- tmpl->id.proto)) != NULL) {
+ (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
+ tmpl->id.proto, family)) != NULL) {
xfrm_state_put(x0);
error = -EEXIST;
goto out;
@@ -404,17 +576,24 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
* to current session. */
xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+ error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
+ if (error) {
+ x->km.state = XFRM_STATE_DEAD;
+ xfrm_state_put(x);
+ x = NULL;
+ goto out;
+ }
+
if (km_query(x, tmpl, pol) == 0) {
x->km.state = XFRM_STATE_ACQ;
- list_add_tail(&x->bydst, xfrm_state_bydst+h);
- xfrm_state_hold(x);
+ hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+ h = xfrm_src_hash(saddr, family);
+ hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
if (x->id.spi) {
h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
- list_add(&x->byspi, xfrm_state_byspi+h);
- xfrm_state_hold(x);
+ hlist_add_head(&x->byspi, xfrm_state_byspi+h);
}
x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
- xfrm_state_hold(x);
x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
add_timer(&x->timer);
} else {
@@ -430,59 +609,167 @@ out:
else
*err = acquire_in_progress ? -EAGAIN : error;
spin_unlock_bh(&xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
return x;
}
static void __xfrm_state_insert(struct xfrm_state *x)
{
- unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+ unsigned int h;
- list_add(&x->bydst, xfrm_state_bydst+h);
- xfrm_state_hold(x);
+ x->genid = ++xfrm_state_genid;
- h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+ h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+ x->props.reqid, x->props.family);
+ hlist_add_head(&x->bydst, xfrm_state_bydst+h);
- list_add(&x->byspi, xfrm_state_byspi+h);
- xfrm_state_hold(x);
+ h = xfrm_src_hash(&x->props.saddr, x->props.family);
+ hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
- if (!mod_timer(&x->timer, jiffies + HZ))
- xfrm_state_hold(x);
+ if (x->id.spi) {
+ h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
+ x->props.family);
- if (x->replay_maxage &&
- !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
- xfrm_state_hold(x);
+ hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+ }
+
+ mod_timer(&x->timer, jiffies + HZ);
+ if (x->replay_maxage)
+ mod_timer(&x->rtimer, jiffies + x->replay_maxage);
wake_up(&km_waitq);
+
+ xfrm_state_num++;
+
+ if (x->bydst.next != NULL &&
+ (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
+ xfrm_state_num > xfrm_state_hmask)
+ schedule_work(&xfrm_hash_work);
+}
+
+/* xfrm_state_lock is held */
+static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+{
+ unsigned short family = xnew->props.family;
+ u32 reqid = xnew->props.reqid;
+ struct xfrm_state *x;
+ struct hlist_node *entry;
+ unsigned int h;
+
+ h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+ if (x->props.family == family &&
+ x->props.reqid == reqid &&
+ !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
+ !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+ x->genid = xfrm_state_genid;
+ }
}
void xfrm_state_insert(struct xfrm_state *x)
{
spin_lock_bh(&xfrm_state_lock);
+ __xfrm_state_bump_genids(x);
__xfrm_state_insert(x);
spin_unlock_bh(&xfrm_state_lock);
-
- xfrm_flush_all_bundles();
}
EXPORT_SYMBOL(xfrm_state_insert);
+/* xfrm_state_lock is held */
+static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+{
+ unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+ struct hlist_node *entry;
+ struct xfrm_state *x;
+
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+ if (x->props.reqid != reqid ||
+ x->props.mode != mode ||
+ x->props.family != family ||
+ x->km.state != XFRM_STATE_ACQ ||
+ x->id.spi != 0)
+ continue;
+
+ switch (family) {
+ case AF_INET:
+ if (x->id.daddr.a4 != daddr->a4 ||
+ x->props.saddr.a4 != saddr->a4)
+ continue;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
+ (struct in6_addr *)daddr) ||
+ !ipv6_addr_equal((struct in6_addr *)
+ x->props.saddr.a6,
+ (struct in6_addr *)saddr))
+ continue;
+ break;
+ };
+
+ xfrm_state_hold(x);
+ return x;
+ }
+
+ if (!create)
+ return NULL;
+
+ x = xfrm_state_alloc();
+ if (likely(x)) {
+ switch (family) {
+ case AF_INET:
+ x->sel.daddr.a4 = daddr->a4;
+ x->sel.saddr.a4 = saddr->a4;
+ x->sel.prefixlen_d = 32;
+ x->sel.prefixlen_s = 32;
+ x->props.saddr.a4 = saddr->a4;
+ x->id.daddr.a4 = daddr->a4;
+ break;
+
+ case AF_INET6:
+ ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
+ (struct in6_addr *)daddr);
+ ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
+ (struct in6_addr *)saddr);
+ x->sel.prefixlen_d = 128;
+ x->sel.prefixlen_s = 128;
+ ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
+ (struct in6_addr *)saddr);
+ ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
+ (struct in6_addr *)daddr);
+ break;
+ };
+
+ x->km.state = XFRM_STATE_ACQ;
+ x->id.proto = proto;
+ x->props.family = family;
+ x->props.mode = mode;
+ x->props.reqid = reqid;
+ x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+ xfrm_state_hold(x);
+ x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+ add_timer(&x->timer);
+ hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+ h = xfrm_src_hash(saddr, family);
+ hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+ wake_up(&km_waitq);
+ }
+
+ return x;
+}
+
static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
int xfrm_state_add(struct xfrm_state *x)
{
- struct xfrm_state_afinfo *afinfo;
struct xfrm_state *x1;
int family;
int err;
+ int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
family = x->props.family;
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
spin_lock_bh(&xfrm_state_lock);
- x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+ x1 = __xfrm_state_locate(x, use_spi, family);
if (x1) {
xfrm_state_put(x1);
x1 = NULL;
@@ -490,7 +777,7 @@ int xfrm_state_add(struct xfrm_state *x)
goto out;
}
- if (x->km.seq) {
+ if (use_spi && x->km.seq) {
x1 = __xfrm_find_acq_byseq(x->km.seq);
if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
xfrm_state_put(x1);
@@ -498,20 +785,17 @@ int xfrm_state_add(struct xfrm_state *x)
}
}
- if (!x1)
- x1 = afinfo->find_acq(
- x->props.mode, x->props.reqid, x->id.proto,
- &x->id.daddr, &x->props.saddr, 0);
+ if (use_spi && !x1)
+ x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
+ x->id.proto,
+ &x->id.daddr, &x->props.saddr, 0);
+ __xfrm_state_bump_genids(x);
__xfrm_state_insert(x);
err = 0;
out:
spin_unlock_bh(&xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
-
- if (!err)
- xfrm_flush_all_bundles();
if (x1) {
xfrm_state_delete(x1);
@@ -524,16 +808,12 @@ EXPORT_SYMBOL(xfrm_state_add);
int xfrm_state_update(struct xfrm_state *x)
{
- struct xfrm_state_afinfo *afinfo;
struct xfrm_state *x1;
int err;
-
- afinfo = xfrm_state_get_afinfo(x->props.family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
+ int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
spin_lock_bh(&xfrm_state_lock);
- x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+ x1 = __xfrm_state_locate(x, use_spi, x->props.family);
err = -ESRCH;
if (!x1)
@@ -553,7 +833,6 @@ int xfrm_state_update(struct xfrm_state *x)
out:
spin_unlock_bh(&xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
if (err)
return err;
@@ -569,11 +848,15 @@ out:
if (likely(x1->km.state == XFRM_STATE_VALID)) {
if (x->encap && x1->encap)
memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+ if (x->coaddr && x1->coaddr) {
+ memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
+ }
+ if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
+ memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
x1->km.dying = 0;
- if (!mod_timer(&x1->timer, jiffies + HZ))
- xfrm_state_hold(x1);
+ mod_timer(&x1->timer, jiffies + HZ);
if (x1->curlft.use_time)
xfrm_state_check_expire(x1);
@@ -598,8 +881,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
- if (!mod_timer(&x->timer, jiffies))
- xfrm_state_hold(x);
+ mod_timer(&x->timer, jiffies);
return -EINVAL;
}
@@ -637,50 +919,97 @@ err:
EXPORT_SYMBOL(xfrm_state_check);
struct xfrm_state *
-xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
unsigned short family)
{
struct xfrm_state *x;
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- if (!afinfo)
- return NULL;
spin_lock_bh(&xfrm_state_lock);
- x = afinfo->state_lookup(daddr, spi, proto);
+ x = __xfrm_state_lookup(daddr, spi, proto, family);
spin_unlock_bh(&xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
return x;
}
EXPORT_SYMBOL(xfrm_state_lookup);
struct xfrm_state *
+xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+ u8 proto, unsigned short family)
+{
+ struct xfrm_state *x;
+
+ spin_lock_bh(&xfrm_state_lock);
+ x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
+ spin_unlock_bh(&xfrm_state_lock);
+ return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
+
+struct xfrm_state *
xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
xfrm_address_t *daddr, xfrm_address_t *saddr,
int create, unsigned short family)
{
struct xfrm_state *x;
+
+ spin_lock_bh(&xfrm_state_lock);
+ x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
+ spin_unlock_bh(&xfrm_state_lock);
+
+ return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq);
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+int
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+ unsigned short family)
+{
+ int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
if (!afinfo)
- return NULL;
+ return -EAFNOSUPPORT;
spin_lock_bh(&xfrm_state_lock);
- x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+ if (afinfo->tmpl_sort)
+ err = afinfo->tmpl_sort(dst, src, n);
spin_unlock_bh(&xfrm_state_lock);
xfrm_state_put_afinfo(afinfo);
- return x;
+ return err;
}
-EXPORT_SYMBOL(xfrm_find_acq);
+EXPORT_SYMBOL(xfrm_tmpl_sort);
+
+int
+xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+ unsigned short family)
+{
+ int err = 0;
+ struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ if (!afinfo)
+ return -EAFNOSUPPORT;
+
+ spin_lock_bh(&xfrm_state_lock);
+ if (afinfo->state_sort)
+ err = afinfo->state_sort(dst, src, n);
+ spin_unlock_bh(&xfrm_state_lock);
+ xfrm_state_put_afinfo(afinfo);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_state_sort);
+#endif
/* Silly enough, but I'm lazy to build resolution list */
static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
{
int i;
- struct xfrm_state *x;
- for (i = 0; i < XFRM_DST_HSIZE; i++) {
- list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
- if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
+ for (i = 0; i <= xfrm_state_hmask; i++) {
+ struct hlist_node *entry;
+ struct xfrm_state *x;
+
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+ if (x->km.seq == seq &&
+ x->km.state == XFRM_STATE_ACQ) {
xfrm_state_hold(x);
return x;
}
@@ -714,9 +1043,9 @@ u32 xfrm_get_acqseq(void)
EXPORT_SYMBOL(xfrm_get_acqseq);
void
-xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
{
- u32 h;
+ unsigned int h;
struct xfrm_state *x0;
if (x->id.spi)
@@ -731,10 +1060,10 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
x->id.spi = minspi;
} else {
u32 spi = 0;
- minspi = ntohl(minspi);
- maxspi = ntohl(maxspi);
- for (h=0; h<maxspi-minspi+1; h++) {
- spi = minspi + net_random()%(maxspi-minspi+1);
+ u32 low = ntohl(minspi);
+ u32 high = ntohl(maxspi);
+ for (h=0; h<high-low+1; h++) {
+ spi = low + net_random()%(high-low+1);
x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
x->id.spi = htonl(spi);
@@ -746,8 +1075,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
if (x->id.spi) {
spin_lock_bh(&xfrm_state_lock);
h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- list_add(&x->byspi, xfrm_state_byspi+h);
- xfrm_state_hold(x);
+ hlist_add_head(&x->byspi, xfrm_state_byspi+h);
spin_unlock_bh(&xfrm_state_lock);
wake_up(&km_waitq);
}
@@ -759,13 +1087,14 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
{
int i;
struct xfrm_state *x;
+ struct hlist_node *entry;
int count = 0;
int err = 0;
spin_lock_bh(&xfrm_state_lock);
- for (i = 0; i < XFRM_DST_HSIZE; i++) {
- list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
- if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+ for (i = 0; i <= xfrm_state_hmask; i++) {
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+ if (xfrm_id_proto_match(x->id.proto, proto))
count++;
}
}
@@ -774,9 +1103,9 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
goto out;
}
- for (i = 0; i < XFRM_DST_HSIZE; i++) {
- list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
- if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+ for (i = 0; i <= xfrm_state_hmask; i++) {
+ hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+ if (!xfrm_id_proto_match(x->id.proto, proto))
continue;
err = func(x, --count, data);
if (err)
@@ -833,10 +1162,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event)
km_state_notify(x, &c);
if (x->replay_maxage &&
- !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
- xfrm_state_hold(x);
+ !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
x->xflags &= ~XFRM_TIME_DEFER;
- }
}
EXPORT_SYMBOL(xfrm_replay_notify);
@@ -854,14 +1181,12 @@ static void xfrm_replay_timer_handler(unsigned long data)
}
spin_unlock(&x->lock);
- xfrm_state_put(x);
}
-int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
{
u32 diff;
-
- seq = ntohl(seq);
+ u32 seq = ntohl(net_seq);
if (unlikely(seq == 0))
return -EINVAL;
@@ -883,11 +1208,10 @@ int xfrm_replay_check(struct xfrm_state *x, u32 seq)
}
EXPORT_SYMBOL(xfrm_replay_check);
-void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
{
u32 diff;
-
- seq = ntohl(seq);
+ u32 seq = ntohl(net_seq);
if (seq > x->replay.seq) {
diff = seq - x->replay.seq;
@@ -998,6 +1322,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
}
EXPORT_SYMBOL(km_policy_expired);
+int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+ int err = -EINVAL;
+ int ret;
+ struct xfrm_mgr *km;
+
+ read_lock(&xfrm_km_lock);
+ list_for_each_entry(km, &xfrm_km_list, list) {
+ if (km->report) {
+ ret = km->report(proto, sel, addr);
+ if (!ret)
+ err = ret;
+ }
+ }
+ read_unlock(&xfrm_km_lock);
+ return err;
+}
+EXPORT_SYMBOL(km_report);
+
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
int err;
@@ -1019,7 +1362,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
err = -EINVAL;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
- pol = km->compile_policy(sk->sk_family, optname, data,
+ pol = km->compile_policy(sk, optname, data,
optlen, &err);
if (err >= 0)
break;
@@ -1066,11 +1409,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
write_lock_bh(&xfrm_state_afinfo_lock);
if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
err = -ENOBUFS;
- else {
- afinfo->state_bydst = xfrm_state_bydst;
- afinfo->state_byspi = xfrm_state_byspi;
+ else
xfrm_state_afinfo[afinfo->family] = afinfo;
- }
write_unlock_bh(&xfrm_state_afinfo_lock);
return err;
}
@@ -1087,11 +1427,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
err = -EINVAL;
- else {
+ else
xfrm_state_afinfo[afinfo->family] = NULL;
- afinfo->state_byspi = NULL;
- afinfo->state_bydst = NULL;
- }
}
write_unlock_bh(&xfrm_state_afinfo_lock);
return err;
@@ -1164,8 +1501,6 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
return res;
}
-EXPORT_SYMBOL(xfrm_state_mtu);
-
int xfrm_init_state(struct xfrm_state *x)
{
struct xfrm_state_afinfo *afinfo;
@@ -1209,12 +1544,17 @@ EXPORT_SYMBOL(xfrm_init_state);
void __init xfrm_state_init(void)
{
- int i;
+ unsigned int sz;
+
+ sz = sizeof(struct hlist_head) * 8;
+
+ xfrm_state_bydst = xfrm_hash_alloc(sz);
+ xfrm_state_bysrc = xfrm_hash_alloc(sz);
+ xfrm_state_byspi = xfrm_hash_alloc(sz);
+ if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
+ panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
+ xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
- for (i=0; i<XFRM_DST_HSIZE; i++) {
- INIT_LIST_HEAD(&xfrm_state_bydst[i]);
- INIT_LIST_HEAD(&xfrm_state_byspi[i]);
- }
INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c21dc26141ea..c59a78d2923a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -10,6 +10,7 @@
*
*/
+#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -27,6 +28,9 @@
#include <net/xfrm.h>
#include <net/netlink.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#endif
static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
{
@@ -86,6 +90,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
return 0;
}
+static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type,
+ xfrm_address_t **addrp)
+{
+ struct rtattr *rt = xfrma[type - 1];
+
+ if (!rt)
+ return 0;
+
+ if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp))
+ return -EINVAL;
+
+ if (addrp)
+ *addrp = RTA_DATA(rt);
+
+ return 0;
+}
static inline int verify_sec_ctx_len(struct rtattr **xfrma)
{
@@ -156,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ if (xfrma[XFRMA_ALG_COMP-1] ||
+ xfrma[XFRMA_ALG_AUTH-1] ||
+ xfrma[XFRMA_ALG_CRYPT-1] ||
+ xfrma[XFRMA_ENCAP-1] ||
+ xfrma[XFRMA_SEC_CTX-1] ||
+ !xfrma[XFRMA_COADDR-1])
+ goto out;
+ break;
+#endif
+
default:
goto out;
};
@@ -170,11 +203,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
if ((err = verify_sec_ctx_len(xfrma)))
goto out;
+ if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL)))
+ goto out;
err = -EINVAL;
switch (p->mode) {
- case 0:
- case 1:
+ case XFRM_MODE_TRANSPORT:
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_ROUTEOPTIMIZATION:
break;
default:
@@ -212,6 +248,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
return -ENOMEM;
memcpy(p, ualg, len);
+ strcpy(p->alg_name, algo->name);
*algpp = p;
return 0;
}
@@ -258,6 +295,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
return security_xfrm_state_alloc(x, uctx);
}
+static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg)
+{
+ struct rtattr *rta = u_arg;
+ xfrm_address_t *p, *uaddrp;
+
+ if (!rta)
+ return 0;
+
+ uaddrp = RTA_DATA(rta);
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ memcpy(p, uaddrp, sizeof(*p));
+ *addrpp = p;
+ return 0;
+}
+
static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
{
memcpy(&x->id, &p->id, sizeof(x->id));
@@ -347,7 +402,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
goto error;
if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
goto error;
-
+ if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1])))
+ goto error;
err = xfrm_init_state(x);
if (err)
goto error;
@@ -416,16 +472,48 @@ out:
return err;
}
+static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
+ struct rtattr **xfrma,
+ int *errp)
+{
+ struct xfrm_state *x = NULL;
+ int err;
+
+ if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
+ err = -ESRCH;
+ x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+ } else {
+ xfrm_address_t *saddr = NULL;
+
+ err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr);
+ if (err)
+ goto out;
+
+ if (!saddr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
+ p->family);
+ }
+
+ out:
+ if (!x && errp)
+ *errp = err;
+ return x;
+}
+
static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
{
struct xfrm_state *x;
- int err;
+ int err = -ESRCH;
struct km_event c;
struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
- x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+ x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
if (x == NULL)
- return -ESRCH;
+ return err;
if ((err = security_xfrm_state_delete(x)) != 0)
goto out;
@@ -519,6 +607,13 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
uctx->ctx_len = x->security->ctx_len;
memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
}
+
+ if (x->coaddr)
+ RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
+
+ if (x->lastused)
+ RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
+
nlh->nlmsg_len = skb->tail - b;
out:
sp->this_idx++;
@@ -540,7 +635,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
info.nlmsg_flags = NLM_F_MULTI;
info.this_idx = 0;
info.start_idx = cb->args[0];
- (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+ (void) xfrm_state_walk(0, dump_one_state, &info);
cb->args[0] = info.this_idx;
return skb->len;
@@ -576,10 +671,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
struct xfrm_state *x;
struct sk_buff *resp_skb;
- int err;
+ int err = -ESRCH;
- x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
- err = -ESRCH;
+ x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
if (x == NULL)
goto out_noput;
@@ -692,6 +786,22 @@ static int verify_policy_dir(__u8 dir)
return 0;
}
+static int verify_policy_type(__u8 type)
+{
+ switch (type) {
+ case XFRM_POLICY_TYPE_MAIN:
+#ifdef CONFIG_XFRM_SUB_POLICY
+ case XFRM_POLICY_TYPE_SUB:
+#endif
+ break;
+
+ default:
+ return -EINVAL;
+ };
+
+ return 0;
+}
+
static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
{
switch (p->share) {
@@ -785,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
return 0;
}
+static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma)
+{
+ struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1];
+ struct xfrm_userpolicy_type *upt;
+ __u8 type = XFRM_POLICY_TYPE_MAIN;
+ int err;
+
+ if (rt) {
+ if (rt->rta_len < sizeof(*upt))
+ return -EINVAL;
+
+ upt = RTA_DATA(rt);
+ type = upt->type;
+ }
+
+ err = verify_policy_type(type);
+ if (err)
+ return err;
+
+ *tp = type;
+ return 0;
+}
+
static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
{
xp->priority = p->priority;
@@ -823,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
copy_from_user_policy(xp, p);
+ err = copy_from_user_policy_type(&xp->type, xfrma);
+ if (err)
+ goto error;
+
if (!(err = copy_from_user_tmpl(xp, xfrma)))
err = copy_from_user_sec_ctx(xp, xfrma);
-
- if (err) {
- *errp = err;
- kfree(xp);
- xp = NULL;
- }
+ if (err)
+ goto error;
return xp;
+ error:
+ *errp = err;
+ kfree(xp);
+ return NULL;
}
static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
@@ -909,27 +1046,63 @@ rtattr_failure:
return -1;
}
-static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
{
- if (xp->security) {
- int ctx_size = sizeof(struct xfrm_sec_ctx) +
- xp->security->ctx_len;
- struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
- struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+ int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len;
+ struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+ struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+ uctx->exttype = XFRMA_SEC_CTX;
+ uctx->len = ctx_size;
+ uctx->ctx_doi = s->ctx_doi;
+ uctx->ctx_alg = s->ctx_alg;
+ uctx->ctx_len = s->ctx_len;
+ memcpy(uctx + 1, s->ctx_str, s->ctx_len);
+ return 0;
- uctx->exttype = XFRMA_SEC_CTX;
- uctx->len = ctx_size;
- uctx->ctx_doi = xp->security->ctx_doi;
- uctx->ctx_alg = xp->security->ctx_alg;
- uctx->ctx_len = xp->security->ctx_len;
- memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
+ rtattr_failure:
+ return -1;
+}
+
+static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb)
+{
+ if (x->security) {
+ return copy_sec_ctx(x->security, skb);
}
return 0;
+}
- rtattr_failure:
+static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+ if (xp->security) {
+ return copy_sec_ctx(xp->security, skb);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+ struct xfrm_userpolicy_type upt;
+
+ memset(&upt, 0, sizeof(upt));
+ upt.type = xp->type;
+
+ RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+
+ return 0;
+
+rtattr_failure:
return -1;
}
+#else
+static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+ return 0;
+}
+#endif
+
static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
{
struct xfrm_dump_info *sp = ptr;
@@ -953,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
+ if (copy_to_user_policy_type(xp, skb) < 0)
+ goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
out:
@@ -974,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
info.nlmsg_flags = NLM_F_MULTI;
info.this_idx = 0;
info.start_idx = cb->args[0];
- (void) xfrm_policy_walk(dump_one_policy, &info);
+ (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info);
+#ifdef CONFIG_XFRM_SUB_POLICY
+ (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info);
+#endif
cb->args[0] = info.this_idx;
return skb->len;
@@ -1010,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
{
struct xfrm_policy *xp;
struct xfrm_userpolicy_id *p;
+ __u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
struct km_event c;
int delete;
@@ -1017,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
p = NLMSG_DATA(nlh);
delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
+ err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ if (err)
+ return err;
+
err = verify_policy_dir(p->dir);
if (err)
return err;
if (p->index)
- xp = xfrm_policy_byid(p->dir, p->index, delete);
+ xp = xfrm_policy_byid(type, p->dir, p->index, delete);
else {
struct rtattr **rtattrs = (struct rtattr **)xfrma;
struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1039,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
return err;
}
- xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+ xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
security_xfrm_policy_free(&tmp);
}
if (xp == NULL)
@@ -1222,9 +1405,16 @@ out:
static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
{
-struct km_event c;
+ struct km_event c;
+ __u8 type = XFRM_POLICY_TYPE_MAIN;
+ int err;
+
+ err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ if (err)
+ return err;
- xfrm_policy_flush();
+ xfrm_policy_flush(type);
+ c.data.type = type;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
c.pid = nlh->nlmsg_pid;
@@ -1237,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
struct xfrm_policy *xp;
struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
struct xfrm_userpolicy_info *p = &up->pol;
+ __u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
+ err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ if (err)
+ return err;
+
if (p->index)
- xp = xfrm_policy_byid(p->dir, p->index, 0);
+ xp = xfrm_policy_byid(type, p->dir, p->index, 0);
else {
struct rtattr **rtattrs = (struct rtattr **)xfrma;
struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1257,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
return err;
}
- xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0);
+ xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0);
security_xfrm_policy_free(&tmp);
}
@@ -1384,6 +1579,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0),
[XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
+ [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
};
#undef XMSGSIZE
@@ -1435,7 +1631,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (security_netlink_recv(skb)) {
+ if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
*errp = -EPERM;
return -1;
}
@@ -1708,7 +1904,9 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
- if (copy_to_user_sec_ctx(xp, skb))
+ if (copy_to_user_state_sec_ctx(x, skb))
+ goto nlmsg_failure;
+ if (copy_to_user_policy_type(xp, skb) < 0)
goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -1742,7 +1940,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
/* User gives us xfrm_user_policy_info followed by an array of 0
* or more templates.
*/
-static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
u8 *data, int len, int *dir)
{
struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
@@ -1750,7 +1948,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
struct xfrm_policy *xp;
int nr;
- switch (family) {
+ switch (sk->sk_family) {
case AF_INET:
if (opt != IP_XFRM_POLICY) {
*dir = -EOPNOTSUPP;
@@ -1790,8 +1988,18 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
}
copy_from_user_policy(xp, p);
+ xp->type = XFRM_POLICY_TYPE_MAIN;
copy_templates(xp, ut, nr);
+ if (!xp->security) {
+ int err = security_xfrm_sock_policy_alloc(xp, sk);
+ if (err) {
+ kfree(xp);
+ *dir = err;
+ return NULL;
+ }
+ }
+
*dir = p->dir;
return xp;
@@ -1814,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
+ if (copy_to_user_policy_type(xp, skb) < 0)
+ goto nlmsg_failure;
upe->hard = !!hard;
nlh->nlmsg_len = skb->tail - b;
@@ -1885,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
copy_to_user_policy(xp, p, dir);
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
+ if (copy_to_user_policy_type(xp, skb) < 0)
+ goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -1902,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned char *b;
+#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_userpolicy_type upt;
+#endif
int len = NLMSG_LENGTH(0);
skb = alloc_skb(len, GFP_ATOMIC);
@@ -1911,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c)
nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
+ nlh->nlmsg_flags = 0;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+ memset(&upt, 0, sizeof(upt));
+ upt.type = c->data.type;
+ RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+#endif
nlh->nlmsg_len = skb->tail - b;
@@ -1918,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
nlmsg_failure:
+#ifdef CONFIG_XFRM_SUB_POLICY
+rtattr_failure:
+#endif
kfree_skb(skb);
return -1;
}
@@ -1942,19 +2167,64 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
}
+static int build_report(struct sk_buff *skb, u8 proto,
+ struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+ struct xfrm_user_report *ur;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
+ ur = NLMSG_DATA(nlh);
+ nlh->nlmsg_flags = 0;
+
+ ur->proto = proto;
+ memcpy(&ur->sel, sel, sizeof(ur->sel));
+
+ if (addr)
+ RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
+
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
+ xfrm_address_t *addr)
+{
+ struct sk_buff *skb;
+ size_t len;
+
+ len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report)));
+ skb = alloc_skb(len, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ if (build_report(skb, proto, sel, addr) < 0)
+ BUG();
+
+ NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT;
+ return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+}
+
static struct xfrm_mgr netlink_mgr = {
.id = "netlink",
.notify = xfrm_send_state_notify,
.acquire = xfrm_send_acquire,
.compile_policy = xfrm_compile_policy,
.notify_policy = xfrm_send_policy_notify,
+ .report = xfrm_send_report,
};
static int __init xfrm_user_init(void)
{
struct sock *nlsk;
- printk(KERN_INFO "Initializing IPsec netlink socket\n");
+ printk(KERN_INFO "Initializing XFRM netlink socket\n");
nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
xfrm_netlink_rcv, THIS_MODULE);
OpenPOWER on IntegriCloud