From c2f0c7c356dc9ae15419f00c725a2fcc58eeff58 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 6 May 2005 12:38:39 +0100 Subject: The attached patch addresses the problem with getting the audit daemon shutdown credential information. It creates a new message type AUDIT_TERM_INFO, which is used by the audit daemon to query who issued the shutdown. It requires the placement of a hook function that gathers the information. The hook is after the DAC & MAC checks and before the function returns. Racing threads could overwrite the uid & pid - but they would have to be root and have policy that allows signalling the audit daemon. That should be a manageable risk. The userspace component will be released later in audit 0.7.2. When it receives the TERM signal, it queries the kernel for shutdown information. When it receives it, it writes the message and exits. The message looks like this: type=DAEMON msg=auditd(1114551182.000) auditd normal halt, sending pid=2650 uid=525, auditd pid=1685 Signed-off-by: Steve Grubb Signed-off-by: David Woodhouse --- include/linux/audit.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 19f04b049798..baa80760824c 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -28,14 +28,16 @@ #include /* Request and reply types */ -#define AUDIT_GET 1000 /* Get status */ -#define AUDIT_SET 1001 /* Set status (enable/disable/auditd) */ -#define AUDIT_LIST 1002 /* List filtering rules */ -#define AUDIT_ADD 1003 /* Add filtering rule */ -#define AUDIT_DEL 1004 /* Delete filtering rule */ -#define AUDIT_USER 1005 /* Send a message from user-space */ -#define AUDIT_LOGIN 1006 /* Define the login id and informaiton */ -#define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ +#define AUDIT_GET 1000 /* Get status */ +#define AUDIT_SET 1001 /* Set status (enable/disable/auditd) */ +#define AUDIT_LIST 1002 /* List filtering rules */ +#define AUDIT_ADD 1003 /* Add filtering rule */ +#define AUDIT_DEL 1004 /* Delete filtering rule */ +#define AUDIT_USER 1005 /* Send a message from user-space */ +#define AUDIT_LOGIN 1006 /* Define the login id and information */ +#define AUDIT_SIGNAL_INFO 1010 /* Get information about sender of signal*/ + +#define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ /* Rule flags */ #define AUDIT_PER_TASK 0x01 /* Apply rule at task creation (not syscall) */ @@ -161,6 +163,11 @@ struct audit_rule { /* for AUDIT_LIST, AUDIT_ADD, and AUDIT_DEL */ #ifdef __KERNEL__ +struct audit_sig_info { + uid_t uid; + pid_t pid; +}; + struct audit_buffer; struct audit_context; struct inode; @@ -190,6 +197,7 @@ extern void audit_get_stamp(struct audit_context *ctx, extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); +extern void audit_signal_info(int sig, struct task_struct *t); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) @@ -200,6 +208,7 @@ extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mo #define audit_inode(n,i) do { ; } while (0) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_perms(q,u,g,m) ({ 0; }) +#define audit_signal_info(s,t) do { ; } while (0) #endif #ifdef CONFIG_AUDIT -- cgit v1.2.1 From 197c69c6afd2deb7eec44040ff533d90d26c6161 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 11 May 2005 10:54:05 +0100 Subject: Move ifdef CONFIG_AUDITSYSCALL to header Remove code conditionally dependent on CONFIG_AUDITSYSCALL from audit.c. Move these dependencies to audit.h with the rest. Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- include/linux/audit.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index baa80760824c..58c5589b531f 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -192,7 +192,7 @@ extern void audit_inode(const char *name, const struct inode *inode); /* Private API (for audit.c only) */ extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, uid_t loginuid); -extern void audit_get_stamp(struct audit_context *ctx, +extern int audit_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); @@ -206,6 +206,8 @@ extern void audit_signal_info(int sig, struct task_struct *t); #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) #define audit_inode(n,i) do { ; } while (0) +#define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) +#define audit_get_stamp(c,t,s) ({ 0; }) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_perms(q,u,g,m) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) -- cgit v1.2.1 From c1b773d87eadc3972d697444127e89a7291769a2 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 11 May 2005 10:55:10 +0100 Subject: Add audit_log_type Add audit_log_type to allow callers to specify type and pid when logging. Convert audit_log to wrapper around audit_log_type. Could have converted all audit_log callers directly, but common case is default of type AUDIT_KERNEL and pid 0. Update audit_log_start to take type and pid values when creating a new audit_buffer. Move sequences that did audit_log_start, audit_log_format, audit_set_type, audit_log_end, to simply call audit_log_type directly. This obsoletes audit_set_type and audit_set_pid, so remove them. Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- include/linux/audit.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 58c5589b531f..405332ebf3c6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -216,11 +216,14 @@ extern void audit_signal_info(int sig, struct task_struct *t); #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ -extern void audit_log(struct audit_context *ctx, - const char *fmt, ...) - __attribute__((format(printf,2,3))); +#define audit_log(ctx, fmt, args...) \ + audit_log_type(ctx, AUDIT_KERNEL, 0, fmt, ##args) +extern void audit_log_type(struct audit_context *ctx, int type, + int pid, const char *fmt, ...) + __attribute__((format(printf,4,5))); -extern struct audit_buffer *audit_log_start(struct audit_context *ctx); +extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int type, + int pid); extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); @@ -240,8 +243,9 @@ extern void audit_send_reply(int pid, int seq, int type, void *payload, int size); extern void audit_log_lost(const char *message); #else -#define audit_log(t,f,...) do { ; } while (0) -#define audit_log_start(t) ({ NULL; }) +#define audit_log(c,f,...) do { ; } while (0) +#define audit_log_type(c,t,p,f,...) do { ; } while (0) +#define audit_log_start(c,t,p) ({ NULL; }) #define audit_log_vformat(b,f,a) do { ; } while (0) #define audit_log_format(b,f,...) do { ; } while (0) #define audit_log_end(b) do { ; } while (0) -- cgit v1.2.1 From 6f2f38128170814e151cfedf79532e19cd179567 Mon Sep 17 00:00:00 2001 From: Brad Campbell Date: Thu, 12 May 2005 15:07:47 -0400 Subject: [PATCH] libata basic detection and errata for PATA->SATA bridges This patch works around an issue with WD drives (and possibly others) over SiL PATA->SATA Bridges on SATA controllers locking up with transfers > 200 sectors. Signed-off-by: Brad Campbell --- include/linux/ata.h | 1 + include/linux/libata.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index f178894edd04..ca5fcadf9981 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -224,6 +224,7 @@ struct ata_taskfile { }; #define ata_id_is_ata(id) (((id)[0] & (1 << 15)) == 0) +#define ata_id_is_sata(id) ((id)[93] == 0) #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6)) #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5)) #define ata_id_has_flush(id) ((id)[83] & (1 << 12)) diff --git a/include/linux/libata.h b/include/linux/libata.h index 505160ab472b..d33e70361a7d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -420,6 +420,7 @@ extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); +extern void ata_dev_config(struct ata_port *ap, unsigned int i); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_port *ap); -- cgit v1.2.1 From 7d17c1d606f6e89778f05554ddea43791d5c92a0 Mon Sep 17 00:00:00 2001 From: Date: Thu, 12 May 2005 19:45:25 -0400 Subject: [netdrvrs] Use netif_carrier_* instead of IFF_RUNNING --- include/linux/if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index d73a9d62f208..ce627d9092ef 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -33,7 +33,7 @@ #define IFF_LOOPBACK 0x8 /* is a loopback net */ #define IFF_POINTOPOINT 0x10 /* interface is has p-p link */ #define IFF_NOTRAILERS 0x20 /* avoid use of trailers */ -#define IFF_RUNNING 0x40 /* resources allocated */ +#define IFF_RUNNING 0x40 /* interface running and carrier ok */ #define IFF_NOARP 0x80 /* no ARP protocol */ #define IFF_PROMISC 0x100 /* receive all packets */ #define IFF_ALLMULTI 0x200 /* receive all multicast packets*/ -- cgit v1.2.1 From fff9cfd99c0f88645c3f50d7476d6c8cef99f140 Mon Sep 17 00:00:00 2001 From: Date: Thu, 12 May 2005 20:24:19 -0400 Subject: [PATCH] Wireless Extensions 18 (aka WPA) This is version 18 of the Wireless Extensions. The main change is that it adds all the necessary APIs for WPA and WPA2 support. This work was entirely done by Jouni Malinen, so let's thank him for both his hard work and deep expertise on the subject ;-) This APIs obviously doesn't do much by itself and works in concert with driver support (Jouni already sent you the HostAP changes) and userspace (Jouni is updating wpa_supplicant). This is also orthogonal with the ongoing work on in-kernel IEEE support (but potentially useful). The patch is attached, tested with 2.6.11. Normally, I would ask you to push that directly in the kernel (99% of the patch has been on my web page for ages and it does not affect non-WPA stuff), but Jouni convinced me that it should bake a few weeks in wireless-2.6 first, so that other driver maintainers can get up to speed with it. Signed-off-by: Jeff Garzik --- include/linux/wireless.h | 283 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 280 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 2f51f2b6562e..ae485f9c916e 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1,10 +1,10 @@ /* * This file define a set of standard wireless extensions * - * Version : 17 21.6.04 + * Version : 18 12.3.05 * * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved. */ #ifndef _LINUX_WIRELESS_H @@ -82,7 +82,7 @@ * (there is some stuff that will be added in the future...) * I just plan to increment with each new version. */ -#define WIRELESS_EXT 17 +#define WIRELESS_EXT 18 /* * Changes : @@ -182,6 +182,21 @@ * - Document (struct iw_quality *)->updated, add new flags (INVALID) * - Wireless Event capability in struct iw_range * - Add support for relative TxPower (yick !) + * + * V17 to V18 (From Jouni Malinen ) + * ---------- + * - Add support for WPA/WPA2 + * - Add extended encoding configuration (SIOCSIWENCODEEXT and + * SIOCGIWENCODEEXT) + * - Add SIOCSIWGENIE/SIOCGIWGENIE + * - Add SIOCSIWMLME + * - Add SIOCSIWPMKSA + * - Add struct iw_range bit field for supported encoding capabilities + * - Add optional scan request parameters for SIOCSIWSCAN + * - Add SIOCSIWAUTH/SIOCGIWAUTH for setting authentication and WPA + * related parameters (extensible up to 4096 parameter values) + * - Add wireless events: IWEVGENIE, IWEVMICHAELMICFAILURE, + * IWEVASSOCREQIE, IWEVASSOCRESPIE, IWEVPMKIDCAND */ /**************************** CONSTANTS ****************************/ @@ -256,6 +271,30 @@ #define SIOCSIWPOWER 0x8B2C /* set Power Management settings */ #define SIOCGIWPOWER 0x8B2D /* get Power Management settings */ +/* WPA : Generic IEEE 802.11 informatiom element (e.g., for WPA/RSN/WMM). + * This ioctl uses struct iw_point and data buffer that includes IE id and len + * fields. More than one IE may be included in the request. Setting the generic + * IE to empty buffer (len=0) removes the generic IE from the driver. Drivers + * are allowed to generate their own WPA/RSN IEs, but in these cases, drivers + * are required to report the used IE as a wireless event, e.g., when + * associating with an AP. */ +#define SIOCSIWGENIE 0x8B30 /* set generic IE */ +#define SIOCGIWGENIE 0x8B31 /* get generic IE */ + +/* WPA : IEEE 802.11 MLME requests */ +#define SIOCSIWMLME 0x8B16 /* request MLME operation; uses + * struct iw_mlme */ +/* WPA : Authentication mode parameters */ +#define SIOCSIWAUTH 0x8B32 /* set authentication mode params */ +#define SIOCGIWAUTH 0x8B33 /* get authentication mode params */ + +/* WPA : Extended version of encoding configuration */ +#define SIOCSIWENCODEEXT 0x8B34 /* set encoding token & mode */ +#define SIOCGIWENCODEEXT 0x8B35 /* get encoding token & mode */ + +/* WPA2 : PMKSA cache management */ +#define SIOCSIWPMKSA 0x8B36 /* PMKSA cache operation */ + /* -------------------- DEV PRIVATE IOCTL LIST -------------------- */ /* These 32 ioctl are wireless device private, for 16 commands. @@ -297,6 +336,34 @@ #define IWEVCUSTOM 0x8C02 /* Driver specific ascii string */ #define IWEVREGISTERED 0x8C03 /* Discovered a new node (AP mode) */ #define IWEVEXPIRED 0x8C04 /* Expired a node (AP mode) */ +#define IWEVGENIE 0x8C05 /* Generic IE (WPA, RSN, WMM, ..) + * (scan results); This includes id and + * length fields. One IWEVGENIE may + * contain more than one IE. Scan + * results may contain one or more + * IWEVGENIE events. */ +#define IWEVMICHAELMICFAILURE 0x8C06 /* Michael MIC failure + * (struct iw_michaelmicfailure) + */ +#define IWEVASSOCREQIE 0x8C07 /* IEs used in (Re)Association Request. + * The data includes id and length + * fields and may contain more than one + * IE. This event is required in + * Managed mode if the driver + * generates its own WPA/RSN IE. This + * should be sent just before + * IWEVREGISTERED event for the + * association. */ +#define IWEVASSOCRESPIE 0x8C08 /* IEs used in (Re)Association + * Response. The data includes id and + * length fields and may contain more + * than one IE. This may be sent + * between IWEVASSOCREQIE and + * IWEVREGISTERED events for the + * association. */ +#define IWEVPMKIDCAND 0x8C09 /* PMKID candidate for RSN + * pre-authentication + * (struct iw_pmkid_cand) */ #define IWEVFIRST 0x8C00 @@ -432,12 +499,94 @@ #define IW_SCAN_THIS_MODE 0x0020 /* Scan only this Mode */ #define IW_SCAN_ALL_RATE 0x0040 /* Scan all Bit-Rates */ #define IW_SCAN_THIS_RATE 0x0080 /* Scan only this Bit-Rate */ +/* struct iw_scan_req scan_type */ +#define IW_SCAN_TYPE_ACTIVE 0 +#define IW_SCAN_TYPE_PASSIVE 1 /* Maximum size of returned data */ #define IW_SCAN_MAX_DATA 4096 /* In bytes */ /* Max number of char in custom event - use multiple of them if needed */ #define IW_CUSTOM_MAX 256 /* In bytes */ +/* Generic information element */ +#define IW_GENERIC_IE_MAX 1024 + +/* MLME requests (SIOCSIWMLME / struct iw_mlme) */ +#define IW_MLME_DEAUTH 0 +#define IW_MLME_DISASSOC 1 + +/* SIOCSIWAUTH/SIOCGIWAUTH struct iw_param flags */ +#define IW_AUTH_INDEX 0x0FFF +#define IW_AUTH_FLAGS 0xF000 +/* SIOCSIWAUTH/SIOCGIWAUTH parameters (0 .. 4095) + * (IW_AUTH_INDEX mask in struct iw_param flags; this is the index of the + * parameter that is being set/get to; value will be read/written to + * struct iw_param value field) */ +#define IW_AUTH_WPA_VERSION 0 +#define IW_AUTH_CIPHER_PAIRWISE 1 +#define IW_AUTH_CIPHER_GROUP 2 +#define IW_AUTH_KEY_MGMT 3 +#define IW_AUTH_TKIP_COUNTERMEASURES 4 +#define IW_AUTH_DROP_UNENCRYPTED 5 +#define IW_AUTH_80211_AUTH_ALG 6 +#define IW_AUTH_WPA_ENABLED 7 +#define IW_AUTH_RX_UNENCRYPTED_EAPOL 8 +#define IW_AUTH_ROAMING_CONTROL 9 +#define IW_AUTH_PRIVACY_INVOKED 10 + +/* IW_AUTH_WPA_VERSION values (bit field) */ +#define IW_AUTH_WPA_VERSION_DISABLED 0x00000001 +#define IW_AUTH_WPA_VERSION_WPA 0x00000002 +#define IW_AUTH_WPA_VERSION_WPA2 0x00000004 + +/* IW_AUTH_PAIRWISE_CIPHER and IW_AUTH_GROUP_CIPHER values (bit field) */ +#define IW_AUTH_CIPHER_NONE 0x00000001 +#define IW_AUTH_CIPHER_WEP40 0x00000002 +#define IW_AUTH_CIPHER_TKIP 0x00000004 +#define IW_AUTH_CIPHER_CCMP 0x00000008 +#define IW_AUTH_CIPHER_WEP104 0x00000010 + +/* IW_AUTH_KEY_MGMT values (bit field) */ +#define IW_AUTH_KEY_MGMT_802_1X 1 +#define IW_AUTH_KEY_MGMT_PSK 2 + +/* IW_AUTH_80211_AUTH_ALG values (bit field) */ +#define IW_AUTH_ALG_OPEN_SYSTEM 0x00000001 +#define IW_AUTH_ALG_SHARED_KEY 0x00000002 +#define IW_AUTH_ALG_LEAP 0x00000004 + +/* IW_AUTH_ROAMING_CONTROL values */ +#define IW_AUTH_ROAMING_ENABLE 0 /* driver/firmware based roaming */ +#define IW_AUTH_ROAMING_DISABLE 1 /* user space program used for roaming + * control */ + +/* SIOCSIWENCODEEXT definitions */ +#define IW_ENCODE_SEQ_MAX_SIZE 8 +/* struct iw_encode_ext ->alg */ +#define IW_ENCODE_ALG_NONE 0 +#define IW_ENCODE_ALG_WEP 1 +#define IW_ENCODE_ALG_TKIP 2 +#define IW_ENCODE_ALG_CCMP 3 +/* struct iw_encode_ext ->ext_flags */ +#define IW_ENCODE_EXT_TX_SEQ_VALID 0x00000001 +#define IW_ENCODE_EXT_RX_SEQ_VALID 0x00000002 +#define IW_ENCODE_EXT_GROUP_KEY 0x00000004 +#define IW_ENCODE_EXT_SET_TX_KEY 0x00000008 + +/* IWEVMICHAELMICFAILURE : struct iw_michaelmicfailure ->flags */ +#define IW_MICFAILURE_KEY_ID 0x00000003 /* Key ID 0..3 */ +#define IW_MICFAILURE_GROUP 0x00000004 +#define IW_MICFAILURE_PAIRWISE 0x00000008 +#define IW_MICFAILURE_STAKEY 0x00000010 +#define IW_MICFAILURE_COUNT 0x00000060 /* 1 or 2 (0 = count not supported) + */ + +/* Bit field values for enc_capa in struct iw_range */ +#define IW_ENC_CAPA_WPA 0x00000001 +#define IW_ENC_CAPA_WPA2 0x00000002 +#define IW_ENC_CAPA_CIPHER_TKIP 0x00000004 +#define IW_ENC_CAPA_CIPHER_CCMP 0x00000008 + /* Event capability macros - in (struct iw_range *)->event_capa * Because we have more than 32 possible events, we use an array of * 32 bit bitmasks. Note : 32 bits = 0x20 = 2^5. */ @@ -546,6 +695,132 @@ struct iw_thrspy struct iw_quality high; /* High threshold */ }; +/* + * Optional data for scan request + * + * Note: these optional parameters are controlling parameters for the + * scanning behavior, these do not apply to getting scan results + * (SIOCGIWSCAN). Drivers are expected to keep a local BSS table and + * provide a merged results with all BSSes even if the previous scan + * request limited scanning to a subset, e.g., by specifying an SSID. + * Especially, scan results are required to include an entry for the + * current BSS if the driver is in Managed mode and associated with an AP. + */ +struct iw_scan_req +{ + __u8 scan_type; /* IW_SCAN_TYPE_{ACTIVE,PASSIVE} */ + __u8 essid_len; + __u8 num_channels; /* num entries in channel_list; + * 0 = scan all allowed channels */ + __u8 flags; /* reserved as padding; use zero, this may + * be used in the future for adding flags + * to request different scan behavior */ + struct sockaddr bssid; /* ff:ff:ff:ff:ff:ff for broadcast BSSID or + * individual address of a specific BSS */ + + /* + * Use this ESSID if IW_SCAN_THIS_ESSID flag is used instead of using + * the current ESSID. This allows scan requests for specific ESSID + * without having to change the current ESSID and potentially breaking + * the current association. + */ + __u8 essid[IW_ESSID_MAX_SIZE]; + + /* + * Optional parameters for changing the default scanning behavior. + * These are based on the MLME-SCAN.request from IEEE Std 802.11. + * TU is 1.024 ms. If these are set to 0, driver is expected to use + * reasonable default values. min_channel_time defines the time that + * will be used to wait for the first reply on each channel. If no + * replies are received, next channel will be scanned after this. If + * replies are received, total time waited on the channel is defined by + * max_channel_time. + */ + __u32 min_channel_time; /* in TU */ + __u32 max_channel_time; /* in TU */ + + struct iw_freq channel_list[IW_MAX_FREQUENCIES]; +}; + +/* ------------------------- WPA SUPPORT ------------------------- */ + +/* + * Extended data structure for get/set encoding (this is used with + * SIOCSIWENCODEEXT/SIOCGIWENCODEEXT. struct iw_point and IW_ENCODE_* + * flags are used in the same way as with SIOCSIWENCODE/SIOCGIWENCODE and + * only the data contents changes (key data -> this structure, including + * key data). + * + * If the new key is the first group key, it will be set as the default + * TX key. Otherwise, default TX key index is only changed if + * IW_ENCODE_EXT_SET_TX_KEY flag is set. + * + * Key will be changed with SIOCSIWENCODEEXT in all cases except for + * special "change TX key index" operation which is indicated by setting + * key_len = 0 and ext_flags |= IW_ENCODE_EXT_SET_TX_KEY. + * + * tx_seq/rx_seq are only used when respective + * IW_ENCODE_EXT_{TX,RX}_SEQ_VALID flag is set in ext_flags. Normal + * TKIP/CCMP operation is to set RX seq with SIOCSIWENCODEEXT and start + * TX seq from zero whenever key is changed. SIOCGIWENCODEEXT is normally + * used only by an Authenticator (AP or an IBSS station) to get the + * current TX sequence number. Using TX_SEQ_VALID for SIOCSIWENCODEEXT and + * RX_SEQ_VALID for SIOCGIWENCODEEXT are optional, but can be useful for + * debugging/testing. + */ +struct iw_encode_ext +{ + __u32 ext_flags; /* IW_ENCODE_EXT_* */ + __u8 tx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ + __u8 rx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ + struct sockaddr addr; /* ff:ff:ff:ff:ff:ff for broadcast/multicast + * (group) keys or unicast address for + * individual keys */ + __u16 alg; /* IW_ENCODE_ALG_* */ + __u16 key_len; + __u8 key[0]; +}; + +/* SIOCSIWMLME data */ +struct iw_mlme +{ + __u16 cmd; /* IW_MLME_* */ + __u16 reason_code; + struct sockaddr addr; +}; + +/* SIOCSIWPMKSA data */ +#define IW_PMKSA_ADD 1 +#define IW_PMKSA_REMOVE 2 +#define IW_PMKSA_FLUSH 3 + +#define IW_PMKID_LEN 16 + +struct iw_pmksa +{ + __u32 cmd; /* IW_PMKSA_* */ + struct sockaddr bssid; + __u8 pmkid[IW_PMKID_LEN]; +}; + +/* IWEVMICHAELMICFAILURE data */ +struct iw_michaelmicfailure +{ + __u32 flags; + struct sockaddr src_addr; + __u8 tsc[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ +}; + +/* IWEVPMKIDCAND data */ +#define IW_PMKID_CAND_PREAUTH 0x00000001 /* RNS pre-authentication enabled */ +struct iw_pmkid_cand +{ + __u32 flags; /* IW_PMKID_CAND_* */ + __u32 index; /* the smaller the index, the higher the + * priority */ + struct sockaddr bssid; +}; + /* ------------------------ WIRELESS STATS ------------------------ */ /* * Wireless statistics (used for /proc/net/wireless) @@ -725,6 +1000,8 @@ struct iw_range struct iw_freq freq[IW_MAX_FREQUENCIES]; /* list */ /* Note : this frequency list doesn't need to fit channel numbers, * because each entry contain its channel index */ + + __u32 enc_capa; /* IW_ENC_CAPA_* bit field */ }; /* -- cgit v1.2.1 From c04049939f88b29e235d2da217bce6e8ead44f32 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 13 May 2005 18:17:42 +0100 Subject: AUDIT: Add message types to audit records This patch adds more messages types to the audit subsystem so that audit analysis is quicker, intuitive, and more useful. Signed-off-by: Steve Grubb --- I forgot one type in the big patch. I need to add one for user space originating SE Linux avc messages. This is used by dbus and nscd. -Steve --- Updated to 2.6.12-rc4-mm1. -dwmw2 Signed-off-by: David Woodhouse --- include/linux/audit.h | 66 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 405332ebf3c6..1a15ba38c660 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -27,15 +27,53 @@ #include #include -/* Request and reply types */ +/* The netlink messages for the audit system is divided into blocks: + * 1000 - 1099 are for commanding the audit system + * 1100 - 1199 user space trusted application messages + * 1200 - 1299 messages internal to the audit daemon + * 1300 - 1399 audit event messages + * 1400 - 1499 SE Linux use + * 1500 - 1999 future use + * 2000 is for otherwise unclassified kernel audit messages + * + * Messages from 1000-1199 are bi-directional. 1200-1299 are exclusively user + * space. Anything over that is kernel --> user space communication. + */ #define AUDIT_GET 1000 /* Get status */ #define AUDIT_SET 1001 /* Set status (enable/disable/auditd) */ -#define AUDIT_LIST 1002 /* List filtering rules */ -#define AUDIT_ADD 1003 /* Add filtering rule */ -#define AUDIT_DEL 1004 /* Delete filtering rule */ -#define AUDIT_USER 1005 /* Send a message from user-space */ +#define AUDIT_LIST 1002 /* List syscall filtering rules */ +#define AUDIT_ADD 1003 /* Add syscall filtering rule */ +#define AUDIT_DEL 1004 /* Delete syscall filtering rule */ +#define AUDIT_USER 1005 /* Message from userspace -- deprecated */ #define AUDIT_LOGIN 1006 /* Define the login id and information */ -#define AUDIT_SIGNAL_INFO 1010 /* Get information about sender of signal*/ +#define AUDIT_WATCH_INS 1007 /* Insert file/dir watch entry */ +#define AUDIT_WATCH_REM 1008 /* Remove file/dir watch entry */ +#define AUDIT_WATCH_LIST 1009 /* List all file/dir watches */ +#define AUDIT_SIGNAL_INFO 1010 /* Get info about sender of signal to auditd */ + +#define AUDIT_USER_AUTH 1100 /* User space authentication */ +#define AUDIT_USER_ACCT 1101 /* User space acct change */ +#define AUDIT_USER_MGMT 1102 /* User space acct management */ +#define AUDIT_CRED_ACQ 1103 /* User space credential acquired */ +#define AUDIT_CRED_DISP 1104 /* User space credential disposed */ +#define AUDIT_USER_START 1105 /* User space session start */ +#define AUDIT_USER_END 1106 /* User space session end */ +#define AUDIT_USER_AVC 1107 /* User space avc message */ + +#define AUDIT_DAEMON_START 1200 /* Daemon startup record */ +#define AUDIT_DAEMON_END 1201 /* Daemon normal stop record */ +#define AUDIT_DAEMON_ABORT 1202 /* Daemon error stop record */ +#define AUDIT_DAEMON_CONFIG 1203 /* Daemon config change */ + +#define AUDIT_SYSCALL 1300 /* Syscall event */ +#define AUDIT_FS_WATCH 1301 /* Filesystem watch event */ +#define AUDIT_PATH 1302 /* Filname path information */ +#define AUDIT_IPC 1303 /* IPC record */ +#define AUDIT_SOCKET 1304 /* Socket record */ +#define AUDIT_CONFIG_CHANGE 1305 /* Audit system configuration change */ + +#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ +#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ @@ -216,14 +254,11 @@ extern void audit_signal_info(int sig, struct task_struct *t); #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ -#define audit_log(ctx, fmt, args...) \ - audit_log_type(ctx, AUDIT_KERNEL, 0, fmt, ##args) -extern void audit_log_type(struct audit_context *ctx, int type, - int pid, const char *fmt, ...) - __attribute__((format(printf,4,5))); +extern void audit_log(struct audit_context *ctx, int type, + const char *fmt, ...) + __attribute__((format(printf,3,4))); -extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int type, - int pid); +extern struct audit_buffer *audit_log_start(struct audit_context *ctx,int type); extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); @@ -243,9 +278,8 @@ extern void audit_send_reply(int pid, int seq, int type, void *payload, int size); extern void audit_log_lost(const char *message); #else -#define audit_log(c,f,...) do { ; } while (0) -#define audit_log_type(c,t,p,f,...) do { ; } while (0) -#define audit_log_start(c,t,p) ({ NULL; }) +#define audit_log(c,t,f,...) do { ; } while (0) +#define audit_log_start(c,t) ({ NULL; }) #define audit_log_vformat(b,f,a) do { ; } while (0) #define audit_log_format(b,f,...) do { ; } while (0) #define audit_log_end(b) do { ; } while (0) -- cgit v1.2.1 From 23f32d18aa589e228c5a9e12e0d0c67c9b5bcdce Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 13 May 2005 18:35:15 +0100 Subject: AUDIT: Fix some spelling errors I'm going through the kernel code and have a patch that corrects several spelling errors in comments. From: Steve Grubb Signed-off-by: David Woodhouse --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1a15ba38c660..51e5879af7fc 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -181,7 +181,7 @@ struct audit_message { struct audit_status { __u32 mask; /* Bit mask for valid entries */ - __u32 enabled; /* 1 = enabled, 0 = disbaled */ + __u32 enabled; /* 1 = enabled, 0 = disabled */ __u32 failure; /* Failure-to-log action */ __u32 pid; /* pid of auditd process */ __u32 rate_limit; /* messages rate limit (per second) */ -- cgit v1.2.1 From a1365275e745bb0a173c918a52bcdfa6ce122f7e Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 5 May 2005 15:14:15 -0700 Subject: [PATCH] DM9000 network driver This patch adds support for the davicom dm9000 network driver. The dm9000 is found on some embedded arm boards such as the pimx1 or the scb9328. Signed-off-by: Sascha Hauer Signed-off-by: Ben Dooks Signed-off-by: Andrew Morton diff -puN /dev/null drivers/net/dm9000.c --- include/linux/dm9000.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/dm9000.h (limited to 'include/linux') diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h new file mode 100644 index 000000000000..0008e2ad0c9f --- /dev/null +++ b/include/linux/dm9000.h @@ -0,0 +1,36 @@ +/* include/linux/dm9000.h + * + * Copyright (c) 2004 Simtec Electronics + * Ben Dooks + * + * Header file for dm9000 platform data + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * +*/ + +#ifndef __DM9000_PLATFORM_DATA +#define __DM9000_PLATFORM_DATA __FILE__ + +/* IO control flags */ + +#define DM9000_PLATF_8BITONLY (0x0001) +#define DM9000_PLATF_16BITONLY (0x0002) +#define DM9000_PLATF_32BITONLY (0x0004) + +/* platfrom data for platfrom device structure's platfrom_data field */ + +struct dm9000_plat_data { + unsigned int flags; + + /* allow replacement IO routines */ + + void (*inblk)(void __iomem *reg, void *data, int len); + void (*outblk)(void __iomem *reg, void *data, int len); + void (*dumpblk)(void __iomem *reg, int len); +}; + +#endif /* __DM9000_PLATFORM_DATA */ + -- cgit v1.2.1 From b3dd65f958354226275522b5a64157834bdc5415 Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Thu, 21 Apr 2005 15:57:25 +0200 Subject: [PATCH] Generic HDLC update The attached patch updates generic HDLC to version 1.18. FR Cisco LMI production-tested. Please apply to Linux 2.6. Thanks. Changes: - doc updates - added Cisco LMI support to Frame-Relay code - cleaned hdlc_fr.c a bit, removed some orphaned #defines etc. - fixed a problem with non-functional LMI in FR DCE mode. - changed diagnostic messages to better conform to FR standards - all protocols: information about carrier changes (DCD line) is now printed to kernel logs. Signed-Off-By: Krzysztof Halasa --- include/linux/hdlc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index 503194e62fe1..ed2927ef1ff7 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -1,7 +1,7 @@ /* * Generic HDLC support routines for Linux * - * Copyright (C) 1999-2003 Krzysztof Halasa + * Copyright (C) 1999-2005 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -41,6 +41,7 @@ #define LMI_NONE 1 /* No LMI, all PVCs are static */ #define LMI_ANSI 2 /* ANSI Annex D */ #define LMI_CCITT 3 /* ITU-T Annex A */ +#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ #define HDLC_MAX_MTU 1500 /* Ethernet 1500 bytes */ #define HDLC_MAX_MRU (HDLC_MAX_MTU + 10 + 14 + 4) /* for ETH+VLAN over FR */ @@ -89,6 +90,7 @@ typedef struct pvc_device_struct { unsigned int deleted: 1; unsigned int fecn: 1; unsigned int becn: 1; + unsigned int bandwidth; /* Cisco LMI reporting only */ }state; }pvc_device; -- cgit v1.2.1 From 3ec3b2fba526ead2fa3f3d7c91924f39a0733749 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 17 May 2005 12:08:48 +0100 Subject: AUDIT: Capture sys_socketcall arguments and sockaddrs Signed-off-by: David Woodhouse --- include/linux/audit.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 51e5879af7fc..2f5dc60f8bbd 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -69,8 +69,9 @@ #define AUDIT_FS_WATCH 1301 /* Filesystem watch event */ #define AUDIT_PATH 1302 /* Filname path information */ #define AUDIT_IPC 1303 /* IPC record */ -#define AUDIT_SOCKET 1304 /* Socket record */ +#define AUDIT_SOCKETCALL 1304 /* sys_socketcall arguments */ #define AUDIT_CONFIG_CHANGE 1305 /* Audit system configuration change */ +#define AUDIT_SOCKADDR 1306 /* sockaddr copied as syscall arg */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -235,6 +236,8 @@ extern int audit_get_stamp(struct audit_context *ctx, extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); +extern int audit_socketcall(int nargs, unsigned long *args); +extern int audit_sockaddr(int len, void *addr); extern void audit_signal_info(int sig, struct task_struct *t); #else #define audit_alloc(t) ({ 0; }) @@ -248,6 +251,8 @@ extern void audit_signal_info(int sig, struct task_struct *t); #define audit_get_stamp(c,t,s) ({ 0; }) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_perms(q,u,g,m) ({ 0; }) +#define audit_socketcall(n,a) ({ 0; }) +#define audit_sockaddr(len, addr) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) #endif -- cgit v1.2.1 From 209aba03243ee42a22f8df8d08aa9963f62aec64 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 May 2005 10:21:07 +0100 Subject: AUDIT: Treat all user messages identically. It's silly to have to add explicit entries for new userspace messages as we invent them. Just treat all messages in the user range the same. Signed-off-by: David Woodhouse --- include/linux/audit.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2f5dc60f8bbd..17ea5d522d81 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -51,14 +51,8 @@ #define AUDIT_WATCH_LIST 1009 /* List all file/dir watches */ #define AUDIT_SIGNAL_INFO 1010 /* Get info about sender of signal to auditd */ -#define AUDIT_USER_AUTH 1100 /* User space authentication */ -#define AUDIT_USER_ACCT 1101 /* User space acct change */ -#define AUDIT_USER_MGMT 1102 /* User space acct management */ -#define AUDIT_CRED_ACQ 1103 /* User space credential acquired */ -#define AUDIT_CRED_DISP 1104 /* User space credential disposed */ -#define AUDIT_USER_START 1105 /* User space session start */ -#define AUDIT_USER_END 1106 /* User space session end */ -#define AUDIT_USER_AVC 1107 /* User space avc message */ +#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages uninteresting to kernel */ +#define AUDIT_LAST_USER_MSG 1199 #define AUDIT_DAEMON_START 1200 /* Daemon startup record */ #define AUDIT_DAEMON_END 1201 /* Daemon normal stop record */ @@ -173,13 +167,6 @@ #define AUDIT_ARCH_V850 (EM_V850|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#ifndef __KERNEL__ -struct audit_message { - struct nlmsghdr nlh; - char data[1200]; -}; -#endif - struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ -- cgit v1.2.1 From 867d1191fca388a79e4bb500dd85a9e871c96b99 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 24 Apr 2005 02:06:05 -0500 Subject: [SCSI] remove requeue feature from blk_insert_request() blk_insert_request() has a unobivous feature of requeuing a request setting REQ_SPECIAL|REQ_SOFTBARRIER. SCSI midlayer was the only user and as previous patches removed the usage, remove the feature from blk_insert_request(). Only special requests should be queued with blk_insert_request(). All requeueing should go through blk_requeue_request(). Signed-off-by: Tejun Heo Signed-off-by: James Bottomley --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ef1afc178c0a..4a99b76c5a33 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -544,7 +544,7 @@ extern void blk_end_sync_rq(struct request *rq); extern void blk_attempt_remerge(request_queue_t *, struct request *); extern void __blk_attempt_remerge(request_queue_t *, struct request *); extern struct request *blk_get_request(request_queue_t *, int, int); -extern void blk_insert_request(request_queue_t *, struct request *, int, void *, int); +extern void blk_insert_request(request_queue_t *, struct request *, int, void *); extern void blk_requeue_request(request_queue_t *, struct request *); extern void blk_plug_device(request_queue_t *); extern int blk_remove_plug(request_queue_t *); -- cgit v1.2.1 From daa6eda65a53e5addf86c6bc829129ff51b08bda Mon Sep 17 00:00:00 2001 From: Gerd Knorr Date: Tue, 10 May 2005 10:59:13 +0200 Subject: [SCSI] add scsi changer driver This patch adds a device driver for scsi media changer devices. Signed-off-by: Gerd Knorr Signed-off-by: James Bottomley --- include/linux/chio.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/major.h | 1 + 2 files changed, 169 insertions(+) create mode 100644 include/linux/chio.h (limited to 'include/linux') diff --git a/include/linux/chio.h b/include/linux/chio.h new file mode 100644 index 000000000000..63035ae67e63 --- /dev/null +++ b/include/linux/chio.h @@ -0,0 +1,168 @@ +/* + * ioctl interface for the scsi media changer driver + */ + +/* changer element types */ +#define CHET_MT 0 /* media transport element (robot) */ +#define CHET_ST 1 /* storage element (media slots) */ +#define CHET_IE 2 /* import/export element */ +#define CHET_DT 3 /* data transfer element (tape/cdrom/whatever) */ +#define CHET_V1 4 /* vendor specific #1 */ +#define CHET_V2 5 /* vendor specific #2 */ +#define CHET_V3 6 /* vendor specific #3 */ +#define CHET_V4 7 /* vendor specific #4 */ + + +/* + * CHIOGPARAMS + * query changer properties + * + * CHIOVGPARAMS + * query vendor-specific element types + * + * accessing elements works by specifing type and unit of the element. + * for eample, storage elements are addressed with type = CHET_ST and + * unit = 0 .. cp_nslots-1 + * + */ +struct changer_params { + int cp_curpicker; /* current transport element */ + int cp_npickers; /* number of transport elements (CHET_MT) */ + int cp_nslots; /* number of storage elements (CHET_ST) */ + int cp_nportals; /* number of import/export elements (CHET_IE) */ + int cp_ndrives; /* number of data transfer elements (CHET_DT) */ +}; +struct changer_vendor_params { + int cvp_n1; /* number of vendor specific elems (CHET_V1) */ + char cvp_label1[16]; + int cvp_n2; /* number of vendor specific elems (CHET_V2) */ + char cvp_label2[16]; + int cvp_n3; /* number of vendor specific elems (CHET_V3) */ + char cvp_label3[16]; + int cvp_n4; /* number of vendor specific elems (CHET_V4) */ + char cvp_label4[16]; + int reserved[8]; +}; + + +/* + * CHIOMOVE + * move a medium from one element to another + */ +struct changer_move { + int cm_fromtype; /* type/unit of source element */ + int cm_fromunit; + int cm_totype; /* type/unit of destination element */ + int cm_tounit; + int cm_flags; +}; +#define CM_INVERT 1 /* flag: rotate media (for double-sided like MOD) */ + + +/* + * CHIOEXCHANGE + * move one medium from element #1 to element #2, + * and another one from element #2 to element #3. + * element #1 and #3 are allowed to be identical. + */ +struct changer_exchange { + int ce_srctype; /* type/unit of element #1 */ + int ce_srcunit; + int ce_fdsttype; /* type/unit of element #2 */ + int ce_fdstunit; + int ce_sdsttype; /* type/unit of element #3 */ + int ce_sdstunit; + int ce_flags; +}; +#define CE_INVERT1 1 +#define CE_INVERT2 2 + + +/* + * CHIOPOSITION + * move the transport element (robot arm) to a specific element. + */ +struct changer_position { + int cp_type; + int cp_unit; + int cp_flags; +}; +#define CP_INVERT 1 + + +/* + * CHIOGSTATUS + * get element status for all elements of a specific type + */ +struct changer_element_status { + int ces_type; + unsigned char *ces_data; +}; +#define CESTATUS_FULL 0x01 /* full */ +#define CESTATUS_IMPEXP 0x02 /* media was imported (inserted by sysop) */ +#define CESTATUS_EXCEPT 0x04 /* error condition */ +#define CESTATUS_ACCESS 0x08 /* access allowed */ +#define CESTATUS_EXENAB 0x10 /* element can export media */ +#define CESTATUS_INENAB 0x20 /* element can import media */ + + +/* + * CHIOGELEM + * get more detailed status informtion for a single element + */ +struct changer_get_element { + int cge_type; /* type/unit */ + int cge_unit; + int cge_status; /* status */ + int cge_errno; /* errno */ + int cge_srctype; /* source element of the last move/exchange */ + int cge_srcunit; + int cge_id; /* scsi id (for data transfer elements) */ + int cge_lun; /* scsi lun (for data transfer elements) */ + char cge_pvoltag[36]; /* primary volume tag */ + char cge_avoltag[36]; /* alternate volume tag */ + int cge_flags; +}; +/* flags */ +#define CGE_ERRNO 0x01 /* errno available */ +#define CGE_INVERT 0x02 /* media inverted */ +#define CGE_SRC 0x04 /* media src available */ +#define CGE_IDLUN 0x08 /* ID+LUN available */ +#define CGE_PVOLTAG 0x10 /* primary volume tag available */ +#define CGE_AVOLTAG 0x20 /* alternate volume tag available */ + + +/* + * CHIOSVOLTAG + * set volume tag + */ +struct changer_set_voltag { + int csv_type; /* type/unit */ + int csv_unit; + char csv_voltag[36]; /* volume tag */ + int csv_flags; +}; +#define CSV_PVOLTAG 0x01 /* primary volume tag */ +#define CSV_AVOLTAG 0x02 /* alternate volume tag */ +#define CSV_CLEARTAG 0x04 /* clear volume tag */ + +/* ioctls */ +#define CHIOMOVE _IOW('c', 1,struct changer_move) +#define CHIOEXCHANGE _IOW('c', 2,struct changer_exchange) +#define CHIOPOSITION _IOW('c', 3,struct changer_position) +#define CHIOGPICKER _IOR('c', 4,int) /* not impl. */ +#define CHIOSPICKER _IOW('c', 5,int) /* not impl. */ +#define CHIOGPARAMS _IOR('c', 6,struct changer_params) +#define CHIOGSTATUS _IOW('c', 8,struct changer_element_status) +#define CHIOGELEM _IOW('c',16,struct changer_get_element) +#define CHIOINITELEM _IO('c',17) +#define CHIOSVOLTAG _IOW('c',18,struct changer_set_voltag) +#define CHIOGVPARAMS _IOR('c',19,struct changer_vendor_params) + +/* ---------------------------------------------------------------------- */ + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/include/linux/major.h b/include/linux/major.h index 4b62c42b842c..e36a46702d94 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -100,6 +100,7 @@ #define I2O_MAJOR 80 /* 80->87 */ #define SHMIQ_MAJOR 85 /* Linux/mips, SGI /dev/shmiq */ +#define SCSI_CHANGER_MAJOR 86 #define IDE6_MAJOR 88 #define IDE7_MAJOR 89 -- cgit v1.2.1 From 011161051bbc25f7f8b7df059dbd934c534443f0 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Sat, 21 May 2005 00:15:52 +0100 Subject: AUDIT: Avoid sleeping function in SElinux AVC audit. This patch changes the SELinux AVC to defer logging of paths to the audit framework upon syscall exit, by saving a reference to the (dentry,vfsmount) pair in an auxiliary audit item on the current audit context for processing by audit_log_exit. Signed-off-by: Stephen Smalley Signed-off-by: David Woodhouse --- include/linux/audit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 17ea5d522d81..4b7caf0c6e10 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -69,6 +69,7 @@ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ +#define AUDIT_AVC_PATH 1402 /* dentry, vfsmount pair from avc */ #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ @@ -225,6 +226,7 @@ extern uid_t audit_get_loginuid(struct audit_context *ctx); extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); +extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); #else #define audit_alloc(t) ({ 0; }) @@ -240,6 +242,7 @@ extern void audit_signal_info(int sig, struct task_struct *t); #define audit_ipc_perms(q,u,g,m) ({ 0; }) #define audit_socketcall(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) +#define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) #endif -- cgit v1.2.1 From bfb4496e7239c9132d732a65cdcf3d6a7431ad1a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 21 May 2005 21:08:09 +0100 Subject: AUDIT: Assign serial number to non-syscall messages Move audit_serial() into audit.c and use it to generate serial numbers on messages even when there is no audit context from syscall auditing. This allows us to disambiguate audit records when more than one is generated in the same millisecond. Based on a patch by Steve Grubb after he observed the problem. Signed-off-by: David Woodhouse --- include/linux/audit.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 4b7caf0c6e10..3278ddf41ce6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -219,8 +219,9 @@ extern void audit_inode(const char *name, const struct inode *inode); /* Private API (for audit.c only) */ extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, uid_t loginuid); -extern int audit_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial); +extern unsigned int audit_serial(void); +extern void auditsc_get_stamp(struct audit_context *ctx, + struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); @@ -237,7 +238,7 @@ extern void audit_signal_info(int sig, struct task_struct *t); #define audit_putname(n) do { ; } while (0) #define audit_inode(n,i) do { ; } while (0) #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) -#define audit_get_stamp(c,t,s) ({ 0; }) +#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_perms(q,u,g,m) ({ 0; }) #define audit_socketcall(n,a) ({ 0; }) -- cgit v1.2.1 From 8604affde9d4f52f04342d6a37c77d95fa167e7a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 May 2005 14:55:34 +0200 Subject: [PATCH] convert IDE device drivers to driver-model * add ide_bus_match() and export ide_bus_type * split ide_remove_driver_from_hwgroup() out of ide_unregister() * move device cleanup from ide_unregister() to drive_release_dev() * convert ide_driver_t->name to driver->name * convert ide_driver_t->{attach,cleanup} to driver->{probe,remove} * remove ide_driver_t->busy as ide_bus_type->subsys.rwsem protects against concurrent ->{probe,remove} calls * make ide_{un}register_driver() void as it cannot fail now * use driver_{un}register() directly, remove ide_{un}register_driver() * use device_register() instead of ata_attach(), remove ata_attach() * add proc_print_driver() and ide_drivers_show(), remove ide_drivers_op * fix ide_replace_subdriver() and move it to ide-proc.c * remove ide_driver_t->drives, ide_drives and drives_lock * remove ide_driver_t->drivers, drivers and drivers_lock * remove ide_drive_t->driver and DRIVER() macro Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9cfc0999becb..336d6e509f59 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -664,7 +664,6 @@ typedef struct ide_drive_s { struct request *rq; /* current request */ struct ide_drive_s *next; /* circular list of hwgroup drives */ - struct ide_driver_s *driver;/* (ide_driver_t *) */ void *driver_data; /* extra driver data */ struct hd_driveid *id; /* drive model identification info */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ @@ -758,6 +757,8 @@ typedef struct ide_drive_s { struct semaphore gendev_rel_sem; /* to deal with device release() */ } ide_drive_t; +#define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) + #define IDE_CHIPSET_PCI_MASK \ ((1<> (c)) & 1) @@ -1086,28 +1087,20 @@ enum { */ typedef struct ide_driver_s { struct module *owner; - const char *name; const char *version; u8 media; - unsigned busy : 1; unsigned supports_dsc_overlap : 1; - int (*cleanup)(ide_drive_t *); ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); ide_startstop_t (*abort)(ide_drive_t *, struct request *rq); int (*ioctl)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long); ide_proc_entry_t *proc; - int (*attach)(ide_drive_t *); void (*ata_prebuilder)(ide_drive_t *); void (*atapi_prebuilder)(ide_drive_t *); struct device_driver gen_driver; - struct list_head drives; - struct list_head drivers; } ide_driver_t; -#define DRIVER(drive) ((drive)->driver) - int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); /* @@ -1328,8 +1321,6 @@ extern void ide_init_subdrivers(void); void ide_init_disk(struct gendisk *, ide_drive_t *); -extern int ata_attach(ide_drive_t *); - extern int ideprobe_init(void); extern void ide_scan_pcibus(int scan_direction) __init; @@ -1342,11 +1333,8 @@ extern void default_hwif_iops(ide_hwif_t *); extern void default_hwif_mmiops(ide_hwif_t *); extern void default_hwif_transport(ide_hwif_t *); -int ide_register_driver(ide_driver_t *driver); -void ide_unregister_driver(ide_driver_t *driver); -int ide_register_subdriver(ide_drive_t *, ide_driver_t *); -int ide_unregister_subdriver (ide_drive_t *drive); -int ide_replace_subdriver(ide_drive_t *drive, const char *driver); +void ide_register_subdriver(ide_drive_t *, ide_driver_t *); +void ide_unregister_subdriver(ide_drive_t *, ide_driver_t *); #define ON_BOARD 1 #define NEVER_BOARD 0 -- cgit v1.2.1 From 0dca51d362b8e4af6b0dbc9e54d1e5165341918a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 26 May 2005 12:55:48 -0700 Subject: [PKT_SCHED] netem: allow random reordering (with fix) Here is a fixed up version of the reorder feature of netem. It is the same as the earlier patch plus with the bugfix from Julio merged in. Has expected backwards compatibility behaviour. Go ahead and merge this one, the TCP strangeness I was seeing was due to the reordering bug, and previous version of TSO patch. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 73d84c071cb1..1d9da36eb9db 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -427,6 +427,7 @@ enum TCA_NETEM_UNSPEC, TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, __TCA_NETEM_MAX, }; @@ -437,7 +438,7 @@ struct tc_netem_qopt __u32 latency; /* added delay (us) */ __u32 limit; /* fifo limit (packets) */ __u32 loss; /* random packet loss (0=none ~0=100%) */ - __u32 gap; /* re-ordering gap (0 for delay all) */ + __u32 gap; /* re-ordering gap (0 for none) */ __u32 duplicate; /* random packet dup (0=none ~0=100%) */ __u32 jitter; /* random jitter in latency (us) */ }; @@ -449,6 +450,12 @@ struct tc_netem_corr __u32 dup_corr; /* duplicate correlation */ }; +struct tc_netem_reorder +{ + __u32 probability; + __u32 correlation; +}; + #define NETEM_DIST_SCALE 8192 #endif -- cgit v1.2.1 From c6b3365391c626206f6789354794a81a010cb7a1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 26 May 2005 12:59:05 -0700 Subject: [TOKENRING]: be'ify trh_hdr, trllc, rif_cache_s Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/if_tr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index a2b01e1e72f2..6688b414c529 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -36,8 +36,8 @@ struct trh_hdr { __u8 fc; /* frame control field */ __u8 daddr[TR_ALEN]; /* destination address */ __u8 saddr[TR_ALEN]; /* source address */ - __u16 rcf; /* route control field */ - __u16 rseg[8]; /* routing registers */ + __be16 rcf; /* route control field */ + __be16 rseg[8]; /* routing registers */ }; #ifdef __KERNEL__ @@ -55,7 +55,7 @@ struct trllc { __u8 ssap; /* source SAP */ __u8 llc; /* LLC control field */ __u8 protid[3]; /* protocol id */ - __u16 ethertype; /* ether type field */ + __be16 ethertype; /* ether type field */ }; /* Token-Ring statistics collection data. */ -- cgit v1.2.1 From b6016b767397258b58163494a869f8f1199e6897 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 May 2005 13:03:09 -0700 Subject: [BNX2]: New Broadcom gigabit network driver. A new driver bnx2 for Broadcom bcm5706 is available. The patch also includes new 1000BASE-X advertisement bit definitions in mii.h Thanks to David Miller and Jeff Garzik for reviewing and their valuable feedback. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/mii.h | 8 ++++++++ include/linux/pci_ids.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 20971fe78a8d..374b615ea9ea 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -65,9 +65,13 @@ #define ADVERTISE_SLCT 0x001f /* Selector bits */ #define ADVERTISE_CSMA 0x0001 /* Only selector supported */ #define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_1000XFULL 0x0020 /* Try for 1000BASE-X full-duplex */ #define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_1000XHALF 0x0040 /* Try for 1000BASE-X half-duplex */ #define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_1000XPAUSE 0x0080 /* Try for 1000BASE-X pause */ #define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_1000XPSE_ASYM 0x0100 /* Try for 1000BASE-X asym pause */ #define ADVERTISE_100BASE4 0x0200 /* Try for 100mbps 4k packets */ #define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ #define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymetric pause */ @@ -84,9 +88,13 @@ /* Link partner ability register. */ #define LPA_SLCT 0x001f /* Same as advertise selector */ #define LPA_10HALF 0x0020 /* Can do 10mbps half-duplex */ +#define LPA_1000XFULL 0x0020 /* Can do 1000BASE-X full-duplex */ #define LPA_10FULL 0x0040 /* Can do 10mbps full-duplex */ +#define LPA_1000XHALF 0x0040 /* Can do 1000BASE-X half-duplex */ #define LPA_100HALF 0x0080 /* Can do 100mbps half-duplex */ +#define LPA_1000XPAUSE 0x0080 /* Can do 1000BASE-X pause */ #define LPA_100FULL 0x0100 /* Can do 100mbps full-duplex */ +#define LPA_1000XPAUSE_ASYM 0x0100 /* Can do 1000BASE-X pause asym*/ #define LPA_100BASE4 0x0200 /* Can do 100mbps 4k packets */ #define LPA_PAUSE_CAP 0x0400 /* Can pause */ #define LPA_PAUSE_ASYM 0x0800 /* Can pause asymetrically */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7b9720e35361..7ccbc2e4272c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2071,6 +2071,7 @@ #define PCI_DEVICE_ID_TIGON3_5703 0x1647 #define PCI_DEVICE_ID_TIGON3_5704 0x1648 #define PCI_DEVICE_ID_TIGON3_5704S_2 0x1649 +#define PCI_DEVICE_ID_NX2_5706 0x164a #define PCI_DEVICE_ID_TIGON3_5702FE 0x164d #define PCI_DEVICE_ID_TIGON3_5705 0x1653 #define PCI_DEVICE_ID_TIGON3_5705_2 0x1654 @@ -2090,6 +2091,7 @@ #define PCI_DEVICE_ID_TIGON3_5702X 0x16a6 #define PCI_DEVICE_ID_TIGON3_5703X 0x16a7 #define PCI_DEVICE_ID_TIGON3_5704S 0x16a8 +#define PCI_DEVICE_ID_NX2_5706S 0x16aa #define PCI_DEVICE_ID_TIGON3_5702A3 0x16c6 #define PCI_DEVICE_ID_TIGON3_5703A3 0x16c7 #define PCI_DEVICE_ID_TIGON3_5781 0x16dd -- cgit v1.2.1 From aa8f0dc6c3dbf1cf3ff58f3e945c981be134814d Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 26 May 2005 21:54:27 -0400 Subject: libata: Fix use-after-iounmap Jens Axboe pointed out that the iounmap() call in libata was occurring too early, and some drivers (ahci, probably others) were using ioremap'd memory after it had been unmapped. The patch should address that problem by way of improving the libata driver API: * move ->host_stop() call after all ->port_stop() calls have occurred. * create default helper function ata_host_stop(), and move iounmap() call there. * add ->host_stop_prewalk() hook, use it in sata_qstor.c (hi Mark). sata_qstor appears to require the host-stop-before-port-stop ordering that existed prior to applying the attached patch. --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1f7e2039a04e..e74f301e9bae 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -410,6 +410,7 @@ extern u8 ata_chk_err(struct ata_port *ap); extern void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf); extern int ata_port_start (struct ata_port *ap); extern void ata_port_stop (struct ata_port *ap); +extern void ata_host_stop (struct ata_host_set *host_set); extern irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs); extern void ata_qc_prep(struct ata_queued_cmd *qc); extern int ata_qc_issue_prot(struct ata_queued_cmd *qc); -- cgit v1.2.1 From 8f37d47c9bf74cb48692691086b482e315d07f40 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 27 May 2005 12:17:28 +0100 Subject: AUDIT: Record working directory when syscall arguments are pathnames Signed-off-by: David Woodhouse --- include/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 3278ddf41ce6..bf2ad3ba72eb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -61,11 +61,12 @@ #define AUDIT_SYSCALL 1300 /* Syscall event */ #define AUDIT_FS_WATCH 1301 /* Filesystem watch event */ -#define AUDIT_PATH 1302 /* Filname path information */ +#define AUDIT_PATH 1302 /* Filename path information */ #define AUDIT_IPC 1303 /* IPC record */ #define AUDIT_SOCKETCALL 1304 /* sys_socketcall arguments */ #define AUDIT_CONFIG_CHANGE 1305 /* Audit system configuration change */ #define AUDIT_SOCKADDR 1306 /* sockaddr copied as syscall arg */ +#define AUDIT_CWD 1307 /* Current working directory */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.1 From 25be5e6ccc5c9cab77012fabbb72520f9b6e4618 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 27 May 2005 04:21:50 -0400 Subject: [PATCH] VIA IRQ quirk Delete quirk_via_bridge(), restore quirk_via_irqpic() -- but now improved to be invoked upon device ENABLE, and now only for VIA devices -- not all devices behind VIA bridges. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index aefe6d051ace..d5a55bdb9c3c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -25,6 +25,8 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#ifdef CONFIG_ACPI + #ifndef _LINUX #define _LINUX #endif @@ -533,4 +535,5 @@ static inline int acpi_get_pxm(acpi_handle handle) extern int pnpacpi_disabled; -#endif /*_LINUX_ACPI_H*/ +#endif /* CONFIG_ACPI */ +#endif /*_LINUX_ACPI_H*/ -- cgit v1.2.1 From 668d1e6093110f7534e661e2ff43d54c74659b6d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 28 May 2005 02:11:12 -0500 Subject: Input: This patch adds dummy gameport_register_port, gameport_unregister_port and gameport_set_phys functions to gameport.h for the case when a driver can't use gameport. This fixes the compilation of some OSS drivers with GAMEPORT=n without the need to #if inside every single driver. This patch also removes the non-working and now obsolete SOUND_GAMEPORT. This patch is also an alternative solution for ALSA drivers with similar problems (but #if's inside the drivers might have the advantage of saving some more bytes of gameport is not available). The only user-visible change is that for GAMEPORT=m the affected OSS drivers are now allowed to be built statically (but they won't have gameport support). Signed-off-by: Adrian Bunk Signed-off-by: Vojtech Pavlik Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index b1272f822cfa..cd623eccdbea 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -67,6 +67,8 @@ int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mo void gameport_close(struct gameport *gameport); void gameport_rescan(struct gameport *gameport); +#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE)) + void __gameport_register_port(struct gameport *gameport, struct module *owner); static inline void gameport_register_port(struct gameport *gameport) { @@ -75,6 +77,29 @@ static inline void gameport_register_port(struct gameport *gameport) void gameport_unregister_port(struct gameport *gameport); +void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +#else + +static inline void gameport_register_port(struct gameport *gameport) +{ + return; +} + +static inline void gameport_unregister_port(struct gameport *gameport) +{ + return; +} + +static inline void gameport_set_phys(struct gameport *gameport, + const char *fmt, ...) +{ + return; +} + +#endif + static inline struct gameport *gameport_allocate_port(void) { struct gameport *gameport = kcalloc(1, sizeof(struct gameport), GFP_KERNEL); @@ -92,9 +117,6 @@ static inline void gameport_set_name(struct gameport *gameport, const char *name strlcpy(gameport->name, name, sizeof(gameport->name)); } -void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); - /* * Use the following fucntions to manipulate gameport's per-port * driver-specific data. -- cgit v1.2.1 From 8f28e8fa46625310102aea06fac61ba04c8b5b88 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 28 May 2005 15:52:02 -0700 Subject: [PATCH] irq code: Add coherence test for PREEMPT_ACTIVE After porting this fixlet to UML: http://linux.bkbits.net:8080/linux-2.5/cset@41791ab52lfMuF2i3V-eTIGRBbDYKQ , I've also added a warning which should refuse compilation with insane values for PREEMPT_ACTIVE... maybe we should simply move PREEMPT_ACTIVE out of architectures using GENERIC_IRQS. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ebc712e91066..8336dba18971 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -43,13 +43,17 @@ #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) -#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) +#error PREEMPT_ACTIVE is too low! +#endif + #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) -- cgit v1.2.1 From d8a33ac435c43a1a404b2ec560ef1d1536710c36 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 May 2005 14:13:47 -0700 Subject: [BRIDGE]: features change notification Resend of earlier patch (no changes) from Catalin used to provide device feature change notification. Signed-off-by: Catalin BOIE Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/linux/notifier.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b25bd02720d3..d8c65ecef9d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -913,6 +913,7 @@ extern void dev_mc_discard(struct net_device *dev); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); +extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(const char *name); extern void dev_mcast_init(void); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 9303a003e9ab..5937dd6053c3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -56,6 +56,7 @@ extern int notifier_call_chain(struct notifier_block **n, unsigned long val, voi #define NETDEV_CHANGEADDR 0x0008 #define NETDEV_GOING_DOWN 0x0009 #define NETDEV_CHANGENAME 0x000A +#define NETDEV_FEAT_CHANGE 0x000B #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN -- cgit v1.2.1 From 81e8157583c559c27aac75c708d40a35f563d734 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 May 2005 14:14:35 -0700 Subject: [BRIDGE]: make dev->features unsigned The features field in netdevice is really a bitmask, and bitmask's should be unsigned. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8c65ecef9d9..470af8c1a4a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -401,7 +401,7 @@ struct net_device } reg_state; /* Net device features */ - int features; + unsigned long features; #define NETIF_F_SG 1 /* Scatter/gather IO. */ #define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -- cgit v1.2.1 From 8f937c6099858eee15fae14009dcbd05177fa91d Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 29 May 2005 20:23:46 -0700 Subject: [IPV4]: Primary and secondary addresses Add an option to make secondary IP addresses get promoted when primary IP addresses are removed from the device. It defaults to off to preserve existing behavior. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 ++ include/linux/sysctl.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 6fafb27877a7..7e1e15f934f3 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -29,6 +29,7 @@ struct ipv4_devconf int no_xfrm; int no_policy; int force_igmp_version; + int promote_secondaries; void *sysctl; }; @@ -71,6 +72,7 @@ struct in_device #define IN_DEV_SEC_REDIRECTS(in_dev) (ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects) #define IN_DEV_IDTAG(in_dev) ((in_dev)->cnf.tag) #define IN_DEV_MEDIUM_ID(in_dev) ((in_dev)->cnf.medium_id) +#define IN_DEV_PROMOTE_SECONDARIES(in_dev) (ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries) #define IN_DEV_RX_REDIRECTS(in_dev) \ ((IN_DEV_FORWARD(in_dev) && \ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 772998147e3e..23032d9d6071 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -399,6 +399,7 @@ enum NET_IPV4_CONF_FORCE_IGMP_VERSION=17, NET_IPV4_CONF_ARP_ANNOUNCE=18, NET_IPV4_CONF_ARP_IGNORE=19, + NET_IPV4_CONF_PROMOTE_SECONDARIES=20, __NET_IPV4_CONF_MAX }; -- cgit v1.2.1 From 79165121bc09c209451487d977df910c4ff6fc94 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 29 May 2005 20:24:30 -0700 Subject: [NET]: Add is_multicast_ether_addr() in include/linux/etherdevice.h This patch adds is_multicast_ether_addr() to go along with is_valid_ether_addr() and friends. It then changes is_valid_ether_addr() to use the new macro, and fixes up the comment on that function to move implementation details out of the API doco. Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 220748b7abea..a1478258d002 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -55,19 +55,33 @@ static inline int is_zero_ether_addr(const u8 *addr) return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); } +/** + * is_multicast_ether_addr - Determine if the given Ethernet address is a + * multicast address. + * + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a multicast address. + */ +static inline int is_multicast_ether_addr(const u8 *addr) +{ + return addr[0] & 0x01; +} + /** * is_valid_ether_addr - Determine if the given Ethernet address is valid * @addr: Pointer to a six-byte array containing the Ethernet address * * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not - * a multicast address, and is not FF:FF:FF:FF:FF:FF. The multicast - * and FF:FF:... tests are combined into the single test "!(addr[0]&1)". + * a multicast address, and is not FF:FF:FF:FF:FF:FF. * * Return true if the address is valid. */ static inline int is_valid_ether_addr(const u8 *addr) { - return !(addr[0]&1) && !is_zero_ether_addr(addr); + /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to + * explicitly check for it here. */ + return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } /** @@ -83,6 +97,6 @@ static inline void random_ether_addr(u8 *addr) addr [0] &= 0xfe; /* clear multicast bit */ addr [0] |= 0x02; /* set local assignment bit (IEEE802) */ } -#endif +#endif /* __KERNEL__ */ #endif /* _LINUX_ETHERDEVICE_H */ -- cgit v1.2.1 From 69f6a0fafcdf0bfe85af182695d6d38f80f9d549 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sun, 29 May 2005 20:27:24 -0700 Subject: [NET]: Add ethtool support for NETIF_F_HW_CSUM. Signed-off-by: Jon Mason Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c85b210490ea..a0ab26aab450 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -256,6 +256,7 @@ struct net_device; u32 ethtool_op_get_link(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); +int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); -- cgit v1.2.1 From af00f9811e0ccbd3db84ddc4cffb0da942653393 Mon Sep 17 00:00:00 2001 From: Andy Currid Date: Mon, 23 May 2005 08:55:45 -0700 Subject: [PATCH] PCI: amd74xx patch for new NVIDIA device IDs Here's the 2.6 amd74xx patch for NVIDIA MCP51. Signed-off-by: Andy Currid Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7ccbc2e4272c..b0d6134e1ee6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1230,6 +1230,12 @@ #define PCI_DEVICE_ID_NVIDIA_QUADRO4_900XGL 0x0258 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_750XGL 0x0259 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_700XGL 0x025B +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE 0x0265 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 +#define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 +#define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 +#define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800 0x0280 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X 0x0281 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE 0x0282 -- cgit v1.2.1 From b53cc6ead046093477ec7a3354d620337101ea5b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:47 -0700 Subject: [CPUFREQ] fix up comment in cpufreq.h Fix up comment in cpufreq.h stating transition latency should be passed in microseconds -- it was decided long ago to switch to nanoseconds. Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f21af067d015..927daa86c9b3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -49,7 +49,7 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); /* Frequency values here are CPU kHz so that hardware which doesn't run * with some frequencies can complain without having to guess what per * cent / per mille means. - * Maximum transition latency is in microseconds - if it's unknown, + * Maximum transition latency is in nanoseconds - if it's unknown, * CPUFREQ_ETERNAL shall be used. */ -- cgit v1.2.1 From 0baab86b00cdf9785ac2bb2ce1ab63995b3866ca Mon Sep 17 00:00:00 2001 From: Edward Falk Date: Thu, 2 Jun 2005 18:17:13 -0400 Subject: libata: update inline source docs --- include/linux/libata.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1f7e2039a04e..ad410590664f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -466,12 +466,34 @@ static inline u8 ata_chk_status(struct ata_port *ap) return ap->ops->check_status(ap); } + +/** + * ata_pause - Flush writes and pause 400 nanoseconds. + * @ap: Port to wait for. + * + * LOCKING: + * Inherited from caller. + */ + static inline void ata_pause(struct ata_port *ap) { ata_altstatus(ap); ndelay(400); } + +/** + * ata_busy_wait - Wait for a port status register + * @ap: Port to wait for. + * + * Waits up to max*10 microseconds for the selected bits in the port's + * status register to be cleared. + * Returns final value of status register. + * + * LOCKING: + * Inherited from caller. + */ + static inline u8 ata_busy_wait(struct ata_port *ap, unsigned int bits, unsigned int max) { @@ -486,6 +508,18 @@ static inline u8 ata_busy_wait(struct ata_port *ap, unsigned int bits, return status; } + +/** + * ata_wait_idle - Wait for a port to be idle. + * @ap: Port to wait for. + * + * Waits up to 10ms for port's BUSY and DRQ signals to clear. + * Returns final value of status register. + * + * LOCKING: + * Inherited from caller. + */ + static inline u8 ata_wait_idle(struct ata_port *ap) { u8 status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); @@ -524,6 +558,18 @@ static inline void ata_tf_init(struct ata_port *ap, struct ata_taskfile *tf, uns tf->device = ATA_DEVICE_OBS | ATA_DEV1; } + +/** + * ata_irq_on - Enable interrupts on a port. + * @ap: Port on which interrupts are enabled. + * + * Enable interrupts on a legacy IDE device using MMIO or PIO, + * wait for idle, clear any pending interrupts. + * + * LOCKING: + * Inherited from caller. + */ + static inline u8 ata_irq_on(struct ata_port *ap) { struct ata_ioports *ioaddr = &ap->ioaddr; @@ -543,6 +589,18 @@ static inline u8 ata_irq_on(struct ata_port *ap) return tmp; } + +/** + * ata_irq_ack - Acknowledge a device interrupt. + * @ap: Port on which interrupts are enabled. + * + * Wait up to 10 ms for legacy IDE device to become idle (BUSY + * or BUSY+DRQ clear). Obtain dma status and port status from + * device. Clear the interrupt. Return port status. + * + * LOCKING: + */ + static inline u8 ata_irq_ack(struct ata_port *ap, unsigned int chk_drq) { unsigned int bits = chk_drq ? ATA_BUSY | ATA_DRQ : ATA_BUSY; -- cgit v1.2.1 From b597ef4712c05c962640a655386a7f06cc1a1fbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Jun 2005 16:36:00 -0700 Subject: [NET]: Fix locking in shaper driver. o use a semaphore instead of an opencoded and racy lock o move locking out of shaper_kick and into the callers - most just released the lock before calling shaper_kick o remove in_interrupt() tests. from ->close we can always block, from ->hard_start_xmit and timer context never Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/if_shaper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h index 0485b256d043..004e6f09a6e2 100644 --- a/include/linux/if_shaper.h +++ b/include/linux/if_shaper.h @@ -23,7 +23,7 @@ struct shaper __u32 shapeclock; unsigned long recovery; /* Time we can next clock a packet out on an empty queue */ - unsigned long locked; + struct semaphore sem; struct net_device_stats stats; struct net_device *dev; int (*hard_start_xmit) (struct sk_buff *skb, @@ -38,7 +38,6 @@ struct shaper int (*hard_header_cache)(struct neighbour *neigh, struct hh_cache *hh); void (*header_cache_update)(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr); struct net_device_stats* (*get_stats)(struct net_device *dev); - wait_queue_head_t wait_queue; struct timer_list timer; }; -- cgit v1.2.1 From 5ba0eac6e0b7e2889649a1105d97c600595e2bb1 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 2 Jun 2005 16:48:05 -0700 Subject: [NET]: Fix HH_DATA_OFF. When the hardware header size is a multiple of HH_DATA_MOD, HH_DATA_OFF() incorrectly returns HH_DATA_MOD (instead of 0). This affects ieee80211 layer as 802.11 header is 32 bytes long. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 470af8c1a4a0..ba5d1236aa17 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -204,7 +204,7 @@ struct hh_cache /* cached hardware header; allow for machine alignment needs. */ #define HH_DATA_MOD 16 #define HH_DATA_OFF(__len) \ - (HH_DATA_MOD - ((__len) & (HH_DATA_MOD - 1))) + (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) #define HH_DATA_ALIGN(__len) \ (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; -- cgit v1.2.1 From 719df469cb51199316ae2a11c75a8046be34b899 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 6 May 2005 00:55:56 +0400 Subject: [PATCH] USB: update urb documentation On Wed, May 04, 2005 at 01:37:30PM -0700, David Brownell wrote: > On Wednesday 04 May 2005 12:19 pm, Roman Kagan wrote: > > struct urb { > > /* private, usb core and host controller only fields in the urb */ > > ... > > struct list_head urb_list; /* list pointer to all active urbs */ > > ... > > }; > > > > Is it safe to use it for driver's purposes when the driver owns the urb, > > that is, starting from the completion routine until the urb is submitted > > with usb_submit_urb()? > > Right now, it should be. Great! FWIW I've briefly tested a modified version of usbatm using the list head in struct urb instead of creating a wrapper struct, and I haven't seen any failures yet. So I tend to believe that your "should be" actually means "is" :) > > If it is, can it be guaranteed in future, e.g. > > by moving the list head into the public section of struct urb? > > In fact I'm not sure why it ever got called "private" to usbcore/hcds. > I thought the idea was that it should be like urb->status, reserved for > whoever controls the URB. OK then how about the following (essentially documentation) patch? Signed-off-by: Roman Kagan Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 41d1a644c9d4..2d1ac5058534 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -796,6 +796,10 @@ typedef void (*usb_complete_t)(struct urb *, struct pt_regs *); * of the iso_frame_desc array, and the number of errors is reported in * error_count. Completion callbacks for ISO transfers will normally * (re)submit URBs to ensure a constant transfer rate. + * + * Note that even fields marked "public" should not be touched by the driver + * when the urb is owned by the hcd, that is, since the call to + * usb_submit_urb() till the entry into the completion routine. */ struct urb { @@ -803,12 +807,12 @@ struct urb struct kref kref; /* reference count of the URB */ spinlock_t lock; /* lock for the URB */ void *hcpriv; /* private data for host controller */ - struct list_head urb_list; /* list pointer to all active urbs */ int bandwidth; /* bandwidth for INT/ISO request */ atomic_t use_count; /* concurrent submissions counter */ u8 reject; /* submissions will fail */ /* public, documented fields in the urb that can be used by drivers */ + struct list_head urb_list; /* list head for use by the urb owner */ struct usb_device *dev; /* (in) pointer to associated device */ unsigned int pipe; /* (in) pipe information */ int status; /* (return) non-ISO status */ -- cgit v1.2.1 From 3f5948fa2cbbda1261eec9a39ef3004b3caf73fb Mon Sep 17 00:00:00 2001 From: David Mosberger Date: Mon, 6 Jun 2005 15:50:09 -0700 Subject: [PATCH] Include before testing CONFIG_ACPI I'm not sure why this issue is suddenly showing, but without this patchlet, the zx1 config won't compile anymore (e.g., to see the compilation-error, look for "***" in [1]). [1] http://www.gelato.unsw.edu.au/kerncomp/results//2005-06-06-17-00/zx1_defconfig-log.html Signed-off-by: David Mosberger-Tang Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d5a55bdb9c3c..b123cc08773d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -25,6 +25,8 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#include + #ifdef CONFIG_ACPI #ifndef _LINUX -- cgit v1.2.1 From d0de98fa16169562bd74913c6c9b3857f9065c79 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Tue, 31 May 2005 19:50:49 +0100 Subject: [PATCH] i945G patch for agpgart Attached is a small patch for i945G support against 2.6.11.11. From: Alan Hourihane Signed-off-by: Dave Jones --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b0d6134e1ee6..18f734ec9181 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2382,6 +2382,8 @@ #define PCI_DEVICE_ID_INTEL_82915G_IG 0x2582 #define PCI_DEVICE_ID_INTEL_82915GM_HB 0x2590 #define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592 +#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 +#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 #define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641 #define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642 -- cgit v1.2.1 From 6d1cfbab4de64f2d0c5b0f81177ade0d75b69288 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Jun 2005 14:13:14 -0700 Subject: [TG3]: Fix 5700/5701 DMA corruption on Apple G4. Fix 5700/5701 DMA write corruption on Apple G4 by detecting the Apple UniNorth PCI 1.5 chipset and adjusting the DMA write boundary to 16. DMA test fails to detect the problem with this chipset. Thanks to Manuel Perez Ayala for reporting the problem and helping to debug it. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 18f734ec9181..b8b4ebf9abf1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -874,6 +874,7 @@ #define PCI_DEVICE_ID_APPLE_KL_USB_P 0x0026 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P 0x0027 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP15 0x002d +#define PCI_DEVICE_ID_APPLE_UNI_N_PCI15 0x002e #define PCI_DEVICE_ID_APPLE_UNI_N_FW2 0x0030 #define PCI_DEVICE_ID_APPLE_UNI_N_GMAC2 0x0032 #define PCI_DEVIEC_ID_APPLE_UNI_N_ATA 0x0033 -- cgit v1.2.1 From 4890062960cbc4d3cebdbd8261a68bc85efcf5d4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 8 Jun 2005 15:10:48 -0700 Subject: [PKT_SCHED]: Allow socket attributes to be matched on via meta ematch Adds meta collectors for all socket attributes that make sense to be filtered upon. Some of them are only useful for debugging but having them doesn't hurt. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/tc_ematch/tc_em_meta.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index aa6b48bb4dcd..a6b2cc530af5 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -56,6 +56,36 @@ enum TCF_META_ID_TCCLASSID, TCF_META_ID_RTCLASSID, TCF_META_ID_RTIIF, + TCF_META_ID_SK_FAMILY, + TCF_META_ID_SK_STATE, + TCF_META_ID_SK_REUSE, + TCF_META_ID_SK_BOUND_IF, + TCF_META_ID_SK_REFCNT, + TCF_META_ID_SK_SHUTDOWN, + TCF_META_ID_SK_PROTO, + TCF_META_ID_SK_TYPE, + TCF_META_ID_SK_RCVBUF, + TCF_META_ID_SK_RMEM_ALLOC, + TCF_META_ID_SK_WMEM_ALLOC, + TCF_META_ID_SK_OMEM_ALLOC, + TCF_META_ID_SK_WMEM_QUEUED, + TCF_META_ID_SK_RCV_QLEN, + TCF_META_ID_SK_SND_QLEN, + TCF_META_ID_SK_ERR_QLEN, + TCF_META_ID_SK_FORWARD_ALLOCS, + TCF_META_ID_SK_SNDBUF, + TCF_META_ID_SK_ALLOCS, + TCF_META_ID_SK_ROUTE_CAPS, + TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_LINGERTIME, + TCF_META_ID_SK_ACK_BACKLOG, + TCF_META_ID_SK_MAX_ACK_BACKLOG, + TCF_META_ID_SK_PRIO, + TCF_META_ID_SK_RCVLOWAT, + TCF_META_ID_SK_RCVTIMEO, + TCF_META_ID_SK_SNDTIMEO, + TCF_META_ID_SK_SENDMSG_OFF, + TCF_META_ID_SK_WRITE_PENDING, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) -- cgit v1.2.1 From a58e76f25432dc5e3e84d04c27bec03347ca365b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jun 2005 10:56:26 +0200 Subject: [PATCH] Remove obsolete HAVE_ARCH_GET_SIGNAL_TO_DELIVER? Now m68k no longer sets HAVE_ARCH_GET_SIGNAL_TO_DELIVER, can it be removed completely? Or may ARM26 still need it? Note that its usage was removed from kernel/signal.c about 2 months ago. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 0a98f5ec5cae..7be18b5e2fb4 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -231,10 +231,8 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); -#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); -#endif #endif /* __KERNEL__ */ -- cgit v1.2.1 From 03722adce90a248d0bea77d390decbd05991e2d2 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Mon, 13 Jun 2005 13:57:10 -0700 Subject: [NET]: linux/if_tr.h needs asm/byteorder.h uses __be16, but does not directly include . Add this in, so that dhcp/net-tools token ring code can compile again. Signed-off-by: Tom Rini Signed-off-by: David S. Miller --- include/linux/if_tr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 6688b414c529..3fba9e2f5427 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -19,6 +19,8 @@ #ifndef _LINUX_IF_TR_H #define _LINUX_IF_TR_H +#include /* For __be16 */ + /* IEEE 802.5 Token-Ring magic constants. The frame sizes omit the preamble and FCS/CRC (frame check sequence). */ #define TR_ALEN 6 /* Octets in one token-ring addr */ -- cgit v1.2.1 From 1c2fb7f93cb20621772bf304f3dba0849942e5db Mon Sep 17 00:00:00 2001 From: "J. Simonetti" Date: Mon, 13 Jun 2005 15:19:03 -0700 Subject: [IPV4]: Sysctl configurable icmp error source address. This patch alows you to change the source address of icmp error messages. It applies cleanly to 2.6.11.11 and retains the default behaviour. In the old (default) behaviour icmp error messages are sent with the ip of the exiting interface. The new behaviour (when the sysctl variable is toggled on), it will send the message with the ip of the interface that received the packet that caused the icmp error. This is the behaviour network administrators will expect from a router. It makes debugging complicated network layouts much easier. Also, all 'vendor routers' I know of have the later behaviour. Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 23032d9d6071..a17745c80a91 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -346,6 +346,7 @@ enum NET_TCP_MODERATE_RCVBUF=106, NET_TCP_TSO_WIN_DIVISOR=107, NET_TCP_BIC_BETA=108, + NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, }; enum { -- cgit v1.2.1 From b8112df71cae7d6a86158caeb19d215f56c4f9ab Mon Sep 17 00:00:00 2001 From: Lee Revell Date: Wed, 15 Jun 2005 14:19:03 -0400 Subject: [SCSI] Add DMA mask constants other than 32 and 64 bit Signed-Off-By: Lee Revell Signed-off-by: James Bottomley --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 806c305332c1..2d80cc761a15 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -14,7 +14,12 @@ enum dma_data_direction { }; #define DMA_64BIT_MASK 0xffffffffffffffffULL +#define DMA_40BIT_MASK 0x000000ffffffffffULL +#define DMA_39BIT_MASK 0x0000007fffffffffULL #define DMA_32BIT_MASK 0x00000000ffffffffULL +#define DMA_31BIT_MASK 0x000000007fffffffULL +#define DMA_30BIT_MASK 0x000000003fffffffULL +#define DMA_29BIT_MASK 0x000000001fffffffULL #include -- cgit v1.2.1 From 26b15dad9f1c19d6d4f7b999b07eaa6d98e4b375 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 18 Jun 2005 22:42:13 -0700 Subject: [IPSEC] Add complete xfrm event notification Heres the final patch. What this patch provides - netlink xfrm events - ability to have events generated by netlink propagated to pfkey and vice versa. - fixes the acquire lets-be-happy-with-one-success issue Signed-off-by: Jamal Hadi Salim Signed-off-by: Herbert Xu --- include/linux/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fd2ef742a9fd..03bc600516ea 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -257,5 +257,7 @@ struct xfrm_usersa_flush { #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 +#define XFRMGRP_SA 4 +#define XFRMGRP_POLICY 8 #endif /* _LINUX_XFRM_H */ -- cgit v1.2.1 From 1944972d3bb651474a5021c9da8d0166ae19f1eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:46:19 -0700 Subject: [SLAB] Introduce kmem_cache_name This is for use with slab users that pass a dynamically allocated slab name in kmem_cache_create, so that before destroying the slab one can retrieve the name and free its memory. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/slab.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7d66385ae750..76cf7e60216c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -64,6 +64,7 @@ extern int kmem_cache_shrink(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); +extern const char *kmem_cache_name(kmem_cache_t *); extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags); /* Size description struct for general caches. */ -- cgit v1.2.1 From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:46:52 -0700 Subject: [NET] Generalise TCP's struct open_request minisock infrastructure Kept this first changeset minimal, without changing existing names to ease peer review. Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn has two new members: ->slab, that replaces tcp_openreq_cachep ->obj_size, to inform the size of the openreq descendant for a specific protocol The protocol specific fields in struct open_request were moved to a class hierarchy, with the things that are common to all connection oriented PF_INET protocols in struct inet_request_sock, the TCP ones in tcp_request_sock, that is an inet_request_sock, that is an open_request. I.e. this uses the same approach used for the struct sock class hierarchy, with sk_prot indicating if the protocol wants to use the open_request infrastructure by filling in sk_prot->rsk_prot with an or_calltable. Results? Performance is improved and TCP v4 now uses only 64 bytes per open request minisock, down from 96 without this patch :-) Next changeset will rename some of the structs, fields and functions mentioned above, struct or_calltable is way unclear, better name it struct request_sock_ops, s/struct open_request/struct request_sock/g, etc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 21 +++++++++++++++++++++ include/linux/ipv6.h | 13 +++++++++++++ include/linux/tcp.h | 11 +++++++++++ 3 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 8438c68591f9..d5b7c907204e 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -81,6 +81,7 @@ #ifdef __KERNEL__ #include #include +#include #include #include #include @@ -107,6 +108,26 @@ struct ip_options { #define optlength(opt) (sizeof(struct ip_options) + opt->optlen) +struct inet_request_sock { + struct open_request req; + u32 loc_addr; + u32 rmt_addr; + u16 rmt_port; + u16 snd_wscale : 4, + rcv_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1, + ecn_ok : 1, + acked : 1; + struct ip_options *opt; +}; + +static inline struct inet_request_sock *inet_rsk(const struct open_request *sk) +{ + return (struct inet_request_sock *)sk; +} + struct ipv6_pinfo; struct inet_sock { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ab0d0efbf240..98acdbf3d446 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,19 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +struct tcp6_request_sock { + struct tcp_request_sock req; + struct in6_addr loc_addr; + struct in6_addr rmt_addr; + struct sk_buff *pktopts; + int iif; +}; + +static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk) +{ + return (struct tcp6_request_sock *)sk; +} + /** * struct ipv6_pinfo - ipv6 private area * diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 14a55e3e3a50..86771b37b80d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -230,6 +230,17 @@ struct tcp_options_received { __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; +struct tcp_request_sock { + struct inet_request_sock req; + __u32 rcv_isn; + __u32 snt_isn; +}; + +static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req) +{ + return (struct tcp_request_sock *)req; +} + struct tcp_sock { /* inet_sock has to be the first member of tcp_sock */ struct inet_sock inet; -- cgit v1.2.1 From 60236fdd08b2169045a3bbfc5ffe1576e6c3c17b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:21 -0700 Subject: [NET] Rename open_request to request_sock Ok, this one just renames some stuff to have a better namespace and to dissassociate it from TCP: struct open_request -> struct request_sock tcp_openreq_alloc -> reqsk_alloc tcp_openreq_free -> reqsk_free tcp_openreq_fastfree -> __reqsk_free With this most of the infrastructure closely resembles a struct sock methods subset. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 4 ++-- include/linux/ipv6.h | 2 +- include/linux/tcp.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index d5b7c907204e..31e7cedd9f84 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -109,7 +109,7 @@ struct ip_options { #define optlength(opt) (sizeof(struct ip_options) + opt->optlen) struct inet_request_sock { - struct open_request req; + struct request_sock req; u32 loc_addr; u32 rmt_addr; u16 rmt_port; @@ -123,7 +123,7 @@ struct inet_request_sock { struct ip_options *opt; }; -static inline struct inet_request_sock *inet_rsk(const struct open_request *sk) +static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) { return (struct inet_request_sock *)sk; } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 98acdbf3d446..6fcd6a0ade24 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -201,7 +201,7 @@ struct tcp6_request_sock { int iif; }; -static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk) +static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk) { return (struct tcp6_request_sock *)sk; } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 86771b37b80d..fb54292a15aa 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -236,7 +236,7 @@ struct tcp_request_sock { __u32 snt_isn; }; -static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req) +static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) { return (struct tcp_request_sock *)req; } @@ -393,8 +393,8 @@ struct tcp_sock { struct tcp_listen_opt *listen_opt; /* FIFO of established children */ - struct open_request *accept_queue; - struct open_request *accept_queue_tail; + struct request_sock *accept_queue; + struct request_sock *accept_queue_tail; unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ -- cgit v1.2.1 From 0e87506fcc734647c7b2497eee4eb81e785c857a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:59 -0700 Subject: [NET] Generalise tcp_listen_opt This chunks out the accept_queue and tcp_listen_opt code and moves them to net/core/request_sock.c and include/net/request_sock.h, to make it useful for other transport protocols, DCCP being the first one to use it. Next patches will rename tcp_listen_opt to accept_sock and remove the inline tcp functions that just call a reqsk_queue_ function. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fb54292a15aa..97a7c9e03df5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -379,22 +379,7 @@ struct tcp_sock { __u32 total_retrans; /* Total retransmits for entire connection */ - /* The syn_wait_lock is necessary only to avoid proc interface having - * to grab the main lock sock while browsing the listening hash - * (otherwise it's deadlock prone). - * This lock is acquired in read mode only from listening_get_next() - * and it's acquired in write mode _only_ from code that is actively - * changing the syn_wait_queue. All readers that are holding - * the master sock lock don't need to grab this lock in read mode - * too as the syn_wait_queue writes are always protected from - * the main sock lock. - */ - rwlock_t syn_wait_lock; - struct tcp_listen_opt *listen_opt; - - /* FIFO of established children */ - struct request_sock *accept_queue; - struct request_sock *accept_queue_tail; + struct request_sock_queue accept_queue; /* FIFO of established children */ unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ -- cgit v1.2.1 From f88a10d65620d97b6d0a7e352a3493c1b7e7409b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:50:12 -0700 Subject: [NETLINK]: New message building macros NLMSG_PUT_ANSWER(skb, nlcb, type, length) Start a new netlink message as answer to a request, returns the message header. NLMSG_END(skb, nlh) End a netlink message, fixes total message length, returns skb->len. NLMSG_CANCEL(skb, nlh) Cancel the building process and trim whole message from skb again, returns -1. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b2738ac8bc99..8d1cb419a930 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -171,8 +171,21 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len) } #define NLMSG_PUT(skb, pid, seq, type, len) \ -({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) goto nlmsg_failure; \ - __nlmsg_put(skb, pid, seq, type, len); }) +({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \ + goto nlmsg_failure; \ + __nlmsg_put(skb, pid, seq, type, len); }) + +#define NLMSG_PUT_ANSWER(skb, cb, type, len) \ + NLMSG_PUT(skb, NETLINK_CB((cb)->skb).pid, \ + (cb)->nlh->nlmsg_seq, type, len) + +#define NLMSG_END(skb, nlh) \ +({ (nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \ + (skb)->len; }) + +#define NLMSG_CANCEL(skb, nlh) \ +({ skb_trim(skb, (unsigned char *) (nlh) - (skb)->data); \ + -1; }) extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.1 From 00768244923f66801958a8d2d103f7b65608c9b6 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:50:38 -0700 Subject: [NETLINK] Routing attribute related shortcuts RTA_GET_U(32|64)(tlv) Assumes TLV is a u32/u64 field and returns its value. RTA_GET_[M]SECS(tlv) Assumes TLV is a u64 and transports jiffies converted to seconds or milliseconds and returns its value. RTA_PUT_U(32|64)(skb, type, value) Appends %value as fixed u32/u64 to %skb as TLV %type. RTA_PUT_[M]SECS(skb, type, jiffies) Converts %jiffies to secs/msecs and appends it as u64 to %skb as TLV %type. RTA_PUT_STRING(skb, type, string) Appends %NUL terminated %string to %skb as TLV %type. RTA_NEST(skb, type) Starts a nested TLV %type and returns the nesting handle. RTA_NEST_END(skb, nesting_handle) Finishes the nested TLV %nesting_handle, must be called symmetric to RTA_NEST(). Returns skb->len RTA_NEST_CANCEL(skb, nesting_handle) Cancel the nested TLV %nesting_handle and trim nested TLV from skb again, returns -1. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 91ac97c20777..a09b5d42babf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -789,6 +789,51 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi ({ if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \ goto rtattr_failure; \ memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); }) + +#define RTA_PUT_U32(skb, attrtype, value) \ +({ u32 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u32), &_tmp); }) + +#define RTA_PUT_U64(skb, attrtype, value) \ +({ u64 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u64), &_tmp); }) + +#define RTA_PUT_SECS(skb, attrtype, value) \ + RTA_PUT_U64(skb, attrtype, (value) / HZ) + +#define RTA_PUT_MSECS(skb, attrtype, value) \ + RTA_PUT_U64(skb, attrtype, jiffies_to_msecs(value)) + +#define RTA_PUT_STRING(skb, attrtype, value) \ + RTA_PUT(skb, attrtype, strlen(value) + 1, value) + +#define RTA_NEST(skb, type) \ +({ struct rtattr *__start = (struct rtattr *) (skb)->tail; \ + RTA_PUT(skb, type, 0, NULL); \ + __start; }) + +#define RTA_NEST_END(skb, start) \ +({ (start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) + +#define RTA_NEST_CANCEL(skb, start) \ +({ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +#define RTA_GET_U32(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u32)) \ + goto rtattr_failure; \ + *(u32 *) RTA_DATA(rta); }) + +#define RTA_GET_U64(rta) \ +({ u64 _tmp; \ + if (!rta || RTA_PAYLOAD(rta) < sizeof(u64)) \ + goto rtattr_failure; \ + memcpy(&_tmp, RTA_DATA(rta), sizeof(_tmp)); \ + _tmp; }) + +#define RTA_GET_SECS(rta) ((unsigned long) RTA_GET_U64(rta) * HZ) +#define RTA_GET_MSECS(rta) (msecs_to_jiffies((unsigned long) RTA_GET_U64(rta))) static inline struct rtattr * __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen) -- cgit v1.2.1 From c7fb64db001f83ece669c76a02d8ec2fdb1dd307 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:50:55 -0700 Subject: [NETLINK]: Neighbour table configuration and statistics via rtnetlink To retrieve the neighbour tables send RTM_GETNEIGHTBL with the NLM_F_DUMP flag set. Every neighbour table configuration is spread over multiple messages to avoid running into message size limits on systems with many interfaces. The first message in the sequence transports all not device specific data such as statistics, configuration, and the default parameter set. This message is followed by 0..n messages carrying device specific parameter sets. Although the ordering should be sufficient, NDTA_NAME can be used to identify sequences. The initial message can be identified by checking for NDTA_CONFIG. The device specific messages do not contain this TLV but have NDTPA_IFINDEX set to the corresponding interface index. To change neighbour table attributes, send RTM_SETNEIGHTBL with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked otherwise. Device specific parameter sets can be changed by setting NDTPA_IFINDEX to the interface index of the corresponding device. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a09b5d42babf..5a5cda160267 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,6 +89,13 @@ enum { RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -493,6 +500,106 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + +#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ + NLMSG_ALIGN(sizeof(struct ndtmsg)))) +#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) + + /**** * General form of address family dependent message. ****/ -- cgit v1.2.1 From c52a3f89f882b84fc422000655c023fe73e701cf Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:51:26 -0700 Subject: [NETLINK]: Fix RTA_NEST_CANCEL(). Only skb_trim() if 'start' is non-NULL. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5a5cda160267..6fff8c4c99c7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -924,7 +924,8 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi (skb)->len; }) #define RTA_NEST_CANCEL(skb, start) \ -({ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ -1; }) #define RTA_GET_U32(rta) \ -- cgit v1.2.1 From 8f48bcd4ef11a69add178fc3111a77e7ee95bacd Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:52:36 -0700 Subject: [RTNETLINK]: Add RTA_(PUT|GET) shortcuts for u8, u16, and flag Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6fff8c4c99c7..e68dbf0bf579 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -897,6 +897,14 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi goto rtattr_failure; \ memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); }) +#define RTA_PUT_U8(skb, attrtype, value) \ +({ u8 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u8), &_tmp); }) + +#define RTA_PUT_U16(skb, attrtype, value) \ +({ u16 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u16), &_tmp); }) + #define RTA_PUT_U32(skb, attrtype, value) \ ({ u32 _tmp = (value); \ RTA_PUT(skb, attrtype, sizeof(u32), &_tmp); }) @@ -914,6 +922,9 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi #define RTA_PUT_STRING(skb, attrtype, value) \ RTA_PUT(skb, attrtype, strlen(value) + 1, value) +#define RTA_PUT_FLAG(skb, attrtype) \ + RTA_PUT(skb, attrtype, 0, NULL); + #define RTA_NEST(skb, type) \ ({ struct rtattr *__start = (struct rtattr *) (skb)->tail; \ RTA_PUT(skb, type, 0, NULL); \ @@ -928,6 +939,16 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ -1; }) +#define RTA_GET_U8(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u8)) \ + goto rtattr_failure; \ + *(u8 *) RTA_DATA(rta); }) + +#define RTA_GET_U16(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u16)) \ + goto rtattr_failure; \ + *(u16 *) RTA_DATA(rta); }) + #define RTA_GET_U32(rta) \ ({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u32)) \ goto rtattr_failure; \ @@ -940,6 +961,8 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi memcpy(&_tmp, RTA_DATA(rta), sizeof(_tmp)); \ _tmp; }) +#define RTA_GET_FLAG(rta) (!!(rta)) + #define RTA_GET_SECS(rta) ((unsigned long) RTA_GET_U64(rta) * HZ) #define RTA_GET_MSECS(rta) (msecs_to_jiffies((unsigned long) RTA_GET_U64(rta))) -- cgit v1.2.1 From 1797754ea7ee5e0d859b0a32506ff999f8d5fb71 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:53:48 -0700 Subject: [NETLINK]: Introduce NLMSG_NEW macro to better handle netlink flags Introduces a new macro NLMSG_NEW which extends NLMSG_PUT but takes a flags argument. NLMSG_PUT stays there for compatibility but now calls NLMSG_NEW with flags == 0. NLMSG_PUT_ANSWER is renamed to NLMSG_NEW_ANSWER which now also takes a flags argument. Also converts the users of NLMSG_PUT_ANSWER to use NLMSG_NEW_ANSWER and fixes the two direct users of __nlmsg_put to either provide the flags or use NLMSG_NEW(_ANSWER). Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 8d1cb419a930..e38407a23d04 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -156,7 +156,7 @@ struct netlink_notify }; static __inline__ struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len) +__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -164,20 +164,23 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len) nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; nlh->nlmsg_len = size; - nlh->nlmsg_flags = 0; + nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; return nlh; } -#define NLMSG_PUT(skb, pid, seq, type, len) \ +#define NLMSG_NEW(skb, pid, seq, type, len, flags) \ ({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \ goto nlmsg_failure; \ - __nlmsg_put(skb, pid, seq, type, len); }) + __nlmsg_put(skb, pid, seq, type, len, flags); }) + +#define NLMSG_PUT(skb, pid, seq, type, len) \ + NLMSG_NEW(skb, pid, seq, type, len, 0) -#define NLMSG_PUT_ANSWER(skb, cb, type, len) \ - NLMSG_PUT(skb, NETLINK_CB((cb)->skb).pid, \ - (cb)->nlh->nlmsg_seq, type, len) +#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \ + NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \ + (cb)->nlh->nlmsg_seq, type, len, flags) #define NLMSG_END(skb, nlh) \ ({ (nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \ -- cgit v1.2.1 From 0603eac0d6b77acac5924a2734228cbaf072f993 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:54:36 -0700 Subject: [IPSEC]: Add XFRMA_SA/XFRMA_POLICY for delete notification This patch changes the format of the XFRM_MSG_DELSA and XFRM_MSG_DELPOLICY notification so that the main message sent is of the same format as that received by the kernel if the original message was via netlink. This also means that we won't lose the byid information carried in km_event. Since this user interface is introduced by Jamal's patch we can still afford to change it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 03bc600516ea..d68391a9b9f3 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -174,6 +174,8 @@ enum xfrm_attr_type_t { XFRMA_ALG_COMP, /* struct xfrm_algo */ XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */ XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA, + XFRMA_POLICY, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.1 From dd87147eed934eaff92869f3d158697c7239d1d2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:21:43 -0700 Subject: [IPSEC]: Add XFRM_STATE_NOPMTUDISC flag This patch adds the flag XFRM_STATE_NOPMTUDISC for xfrm states. It is similar to the nopmtudisc on IPIP/GRE tunnels. It only has an effect on IPv4 tunnel mode states. For these states, it will ensure that the DF flag is always cleared. This is primarily useful to work around ICMP blackholes. In future this flag could also allow a larger MTU to be set within the tunnel just like IPIP/GRE tunnels. This could be useful for short haul tunnels where temporary fragmentation outside the tunnel is desired over smaller fragments inside the tunnel. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 1 + include/linux/xfrm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index e6b519220245..724066778aff 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -245,6 +245,7 @@ struct sadb_x_nat_t_port { /* Security Association flags */ #define SADB_SAFLAGS_PFS 1 +#define SADB_SAFLAGS_NOPMTUDISC 0x20000000 #define SADB_SAFLAGS_DECAP_DSCP 0x40000000 #define SADB_SAFLAGS_NOECN 0x80000000 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index d68391a9b9f3..f0d423300d84 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -196,6 +196,7 @@ struct xfrm_usersa_info { __u8 flags; #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 +#define XFRM_STATE_NOPMTUDISC 4 }; struct xfrm_usersa_id { -- cgit v1.2.1 From f6e276ee67c0ac9efafd24bc6f7a84aa359656df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 20 Jun 2005 13:32:05 -0700 Subject: [ATALK]: endian annotations Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/atalk.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 31d3fc25ccbd..09a1451c1159 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -20,7 +20,7 @@ #define SIOCATALKDIFADDR (SIOCPROTOPRIVATE + 0) struct atalk_addr { - __u16 s_net; + __be16 s_net; __u8 s_node; }; @@ -33,8 +33,8 @@ struct sockaddr_at { struct atalk_netrange { __u8 nr_phase; - __u16 nr_firstnet; - __u16 nr_lastnet; + __be16 nr_firstnet; + __be16 nr_lastnet; }; #ifdef __KERNEL__ @@ -70,8 +70,8 @@ struct atalk_iface { struct atalk_sock { /* struct sock has to be the first member of atalk_sock */ struct sock sk; - unsigned short dest_net; - unsigned short src_net; + __be16 dest_net; + __be16 src_net; unsigned char dest_node; unsigned char src_node; unsigned char dest_port; @@ -95,9 +95,9 @@ struct ddpehdr { deh_hops:4, deh_len:10; #endif - __u16 deh_sum; - __u16 deh_dnet; - __u16 deh_snet; + __be16 deh_sum; + __be16 deh_dnet; + __be16 deh_snet; __u8 deh_dnode; __u8 deh_snode; __u8 deh_dport; @@ -142,24 +142,24 @@ struct ddpshdr { /* AppleTalk AARP headers */ struct elapaarp { - __u16 hw_type; + __be16 hw_type; #define AARP_HW_TYPE_ETHERNET 1 #define AARP_HW_TYPE_TOKENRING 2 - __u16 pa_type; + __be16 pa_type; __u8 hw_len; __u8 pa_len; #define AARP_PA_ALEN 4 - __u16 function; + __be16 function; #define AARP_REQUEST 1 #define AARP_REPLY 2 #define AARP_PROBE 3 __u8 hw_src[ETH_ALEN] __attribute__ ((packed)); __u8 pa_src_zero __attribute__ ((packed)); - __u16 pa_src_net __attribute__ ((packed)); + __be16 pa_src_net __attribute__ ((packed)); __u8 pa_src_node __attribute__ ((packed)); __u8 hw_dst[ETH_ALEN] __attribute__ ((packed)); __u8 pa_dst_zero __attribute__ ((packed)); - __u16 pa_dst_net __attribute__ ((packed)); + __be16 pa_dst_net __attribute__ ((packed)); __u8 pa_dst_node __attribute__ ((packed)); }; -- cgit v1.2.1 From 246955fe4c38bd706ae30e37c64892c94213775d Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Mon, 20 Jun 2005 13:36:39 -0700 Subject: [NETLINK]: fib_lookup() via netlink Below is a more generic patch to do fib_lookup via netlink. For others we should say that we discussed this as a way to verify route selection. It's also possible there are others uses for this. In short the fist half of struct fib_result_nl is filled in by caller and netlink call fills in the other half and returns it. In case anyone is interested there is a corresponding user app to compare the full routing table this was used to test implementation of the LC-trie. Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e38407a23d04..561d4dc75836 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -14,6 +14,7 @@ #define NETLINK_SELINUX 7 /* SELinux event notifications */ #define NETLINK_ARPD 8 #define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 #define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ -- cgit v1.2.1 From e3a15db2415579d5136b9ba9b52fe27c66da8780 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 26 Apr 2005 02:31:08 -0500 Subject: [PATCH] sysfs_{create|remove}_link should take const char * sysfs: make sysfs_{create|remove}_link to take const char * name. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 38b58b30814a..931b5aaaf380 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -105,11 +105,11 @@ sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode); extern void sysfs_remove_file(struct kobject *, const struct attribute *); -extern int -sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name); +extern int +sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name); extern void -sysfs_remove_link(struct kobject *, char * name); +sysfs_remove_link(struct kobject *, const char * name); int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr); @@ -153,12 +153,12 @@ static inline void sysfs_remove_file(struct kobject * k, const struct attribute ; } -static inline int sysfs_create_link(struct kobject * k, struct kobject * t, char * n) +static inline int sysfs_create_link(struct kobject * k, struct kobject * t, const char * n) { return 0; } -static inline void sysfs_remove_link(struct kobject * k, char * name) +static inline void sysfs_remove_link(struct kobject * k, const char * name) { ; } -- cgit v1.2.1 From f3b4f3c6dec04c6c8261fe22645f07b39976595a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 26 Apr 2005 02:32:00 -0500 Subject: [PATCH] Make kobject's name be const char * kobject: make kobject's name const char * since users should not attempt to change it (except by calling kobject_rename). Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 765d660d3bea..76dc67245c0c 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -33,7 +33,7 @@ extern u64 hotplug_seqnum; struct kobject { - char * k_name; + const char * k_name; char name[KOBJ_NAME_LEN]; struct kref kref; struct list_head entry; @@ -46,7 +46,7 @@ struct kobject { extern int kobject_set_name(struct kobject *, const char *, ...) __attribute__((format(printf,2,3))); -static inline char * kobject_name(struct kobject * kobj) +static inline const char * kobject_name(const struct kobject * kobj) { return kobj->k_name; } @@ -57,7 +57,7 @@ extern void kobject_cleanup(struct kobject *); extern int kobject_add(struct kobject *); extern void kobject_del(struct kobject *); -extern int kobject_rename(struct kobject *, char *new_name); +extern int kobject_rename(struct kobject *, const char *new_name); extern int kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); -- cgit v1.2.1 From 419cab3fc69588ebe35b845cc3a584ae172463de Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 26 Apr 2005 02:32:54 -0500 Subject: [PATCH] kset_hotplug_ops->name shoudl return const char * kobject: change name() method in kset_hotplug_ops return const char * since users shoudl not try to modify returned data. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 76dc67245c0c..3b22304f12fd 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -94,7 +94,7 @@ struct kobj_type { */ struct kset_hotplug_ops { int (*filter)(struct kset *kset, struct kobject *kobj); - char *(*name)(struct kset *kset, struct kobject *kobj); + const char *(*name)(struct kset *kset, struct kobject *kobj); int (*hotplug)(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size); }; -- cgit v1.2.1 From 8d790d74085833ba2a3e84b5bcd683be4981c29a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 26 Apr 2005 02:34:05 -0500 Subject: [PATCH] make driver's name be const char * Driver core: change driver's, bus's, class's and platform device's names to be const char * so one can use const char *drv_name = "asdfg"; when initializing structures. Also kill couple of whitespaces. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index df94c0de53f2..fa9e6ca08f5a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -47,7 +47,7 @@ struct class_device; struct class_simple; struct bus_type { - char * name; + const char * name; struct subsystem subsys; struct kset drivers; @@ -98,17 +98,17 @@ extern int bus_create_file(struct bus_type *, struct bus_attribute *); extern void bus_remove_file(struct bus_type *, struct bus_attribute *); struct device_driver { - char * name; + const char * name; struct bus_type * bus; struct completion unloaded; struct kobject kobj; struct list_head devices; - struct module * owner; + struct module * owner; int (*probe) (struct device * dev); - int (*remove) (struct device * dev); + int (*remove) (struct device * dev); void (*shutdown) (struct device * dev); int (*suspend) (struct device * dev, pm_message_t state, u32 level); int (*resume) (struct device * dev, u32 level); @@ -142,7 +142,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute * * device classes */ struct class { - char * name; + const char * name; struct subsystem subsys; struct list_head children; @@ -366,7 +366,7 @@ extern struct device *device_find(const char *name, struct bus_type *bus); /* drivers/base/platform.c */ struct platform_device { - char * name; + const char * name; u32 id; struct device dev; u32 num_resources; -- cgit v1.2.1 From d48593bf208e0d046c35fb0707ae5b23fef8c4ff Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 29 Apr 2005 00:58:46 -0500 Subject: [PATCH] Make attributes names const char * sysfs: make attributes and attribute_group's names const char * Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 931b5aaaf380..d9cd2d31d377 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -16,13 +16,13 @@ struct kobject; struct module; struct attribute { - char * name; + const char * name; struct module * owner; mode_t mode; }; struct attribute_group { - char * name; + const char * name; struct attribute ** attrs; }; -- cgit v1.2.1 From e9ba6365fd4f0d9e7d022c883bd044fbaa48257f Mon Sep 17 00:00:00 2001 From: "gregkh@suse.de" Date: Tue, 15 Mar 2005 11:54:21 -0800 Subject: [PATCH] CLASS: move a "simple" class logic into the class core. One step on improving the class api so that it can not be used incorrectly. This also fixes the module owner issue with the dev files that happened when the devt logic moved to the class core. Based on a patch originally written by Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index fa9e6ca08f5a..73250d01c01f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -143,6 +143,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute * */ struct class { const char * name; + struct module * owner; struct subsystem subsys; struct list_head children; @@ -185,6 +186,7 @@ struct class_device { struct kobject kobj; struct class * class; /* required */ dev_t devt; /* dev_t, creates the sysfs "dev" */ + struct class_device_attribute *devt_attr; struct device * dev; /* not necessary, but nice to have */ void * class_data; /* class-specific data */ @@ -245,6 +247,13 @@ struct class_interface { extern int class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); +extern struct class *class_create(struct module *owner, char *name); +extern void class_destroy(struct class *cls); +extern struct class_device *class_device_create(struct class *cls, dev_t devt, + struct device *device, char *fmt, ...) + __attribute__((format(printf,4,5))); +extern void class_device_destroy(struct class *cls, dev_t devt); + /* interface for class simple stuff */ extern struct class_simple *class_simple_create(struct module *owner, char *name); extern void class_simple_destroy(struct class_simple *cs); -- cgit v1.2.1 From 1235686f6e67cf30c460eb77d90a6cb4be57b92f Mon Sep 17 00:00:00 2001 From: "gregkh@suse.de" Date: Tue, 15 Mar 2005 14:26:30 -0800 Subject: [PATCH] INPUT: move to use the new class code, instead of class_simple Signed-off-by: Greg Kroah-Hartman --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 72731d7d189e..9d9598ed760d 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1015,7 +1015,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min dev->absbit[LONG(axis)] |= BIT(axis); } -extern struct class_simple *input_class; +extern struct class *input_class; #endif #endif -- cgit v1.2.1 From 8561b10f6e7ef0a085709ffc844f74130a067abe Mon Sep 17 00:00:00 2001 From: "gregkh@suse.de" Date: Tue, 15 Mar 2005 15:10:13 -0800 Subject: [PATCH] USB: move the usb hcd code to use the new class code. This moves a kref into the main hcd structure, which detaches it from the class device structure. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 2d1ac5058534..3d508bf08402 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -287,15 +287,14 @@ struct usb_bus { struct dentry *usbfs_dentry; /* usbfs dentry entry for the bus */ - struct class_device class_dev; /* class device for this bus */ + struct class_device *class_dev; /* class device for this bus */ + struct kref kref; /* handles reference counting this bus */ void (*release)(struct usb_bus *bus); /* function to destroy this bus's memory */ #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct mon_bus *mon_bus; /* non-null when associated */ int monitored; /* non-zero when monitored */ #endif }; -#define to_usb_bus(d) container_of(d, struct usb_bus, class_dev) - /* -------------------------------------------------------------------------- */ -- cgit v1.2.1 From cd987d38cc59053e0bab8150ffaca33b109067f3 Mon Sep 17 00:00:00 2001 From: "gregkh@suse.de" Date: Wed, 23 Mar 2005 11:12:38 -0800 Subject: [PATCH] class: remove class_simple code, as no one in the tree is using it anymore. Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 73250d01c01f..68a95358013d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -44,7 +44,6 @@ struct device; struct device_driver; struct class; struct class_device; -struct class_simple; struct bus_type { const char * name; @@ -254,15 +253,6 @@ extern struct class_device *class_device_create(struct class *cls, dev_t devt, __attribute__((format(printf,4,5))); extern void class_device_destroy(struct class *cls, dev_t devt); -/* interface for class simple stuff */ -extern struct class_simple *class_simple_create(struct module *owner, char *name); -extern void class_simple_destroy(struct class_simple *cs); -extern struct class_device *class_simple_device_add(struct class_simple *cs, dev_t dev, struct device *device, const char *fmt, ...) - __attribute__((format(printf,4,5))); -extern int class_simple_set_hotplug(struct class_simple *, - int (*hotplug)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size)); -extern void class_simple_device_remove(dev_t dev); - struct device { struct list_head node; /* node in sibling list */ -- cgit v1.2.1 From af70316af182f4716cc5eec7e0d27fc731d164bd Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 10:41:04 -0800 Subject: [PATCH] Add a semaphore to struct device to synchronize calls to its driver. This adds a per-device semaphore that is taken before every call from the core to a driver method. This prevents e.g. simultaneous calls to the ->suspend() or ->resume() and ->probe() or ->release(), potentially saving a whole lot of headaches. It also moves us a step closer to removing the bus rwsem, since it protects the fields in struct device that are modified by the core. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 68a95358013d..195b327f3e19 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -264,6 +264,10 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + struct semaphore sem; /* semaphore to synchronize calls to + * its driver. + */ + struct bus_type * bus; /* type of bus device is on */ struct device_driver *driver; /* which driver has allocated this device */ -- cgit v1.2.1 From fae3cd00255e3e51ffd59fedb1bdb91ec96be395 Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 10:59:56 -0800 Subject: [PATCH] Add driver_for_each_device(). Now there's an iterator for accessing each device bound to a driver. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman Index: linux-2.6.12-rc2/drivers/base/driver.c =================================================================== --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 195b327f3e19..10f5aa20e041 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -136,6 +136,9 @@ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store) extern int driver_create_file(struct device_driver *, struct driver_attribute *); extern void driver_remove_file(struct device_driver *, struct driver_attribute *); +extern int driver_for_each_device(struct device_driver * drv, struct device * start, + void * data, int (*fn)(struct device *, void *)); + /* * device classes -- cgit v1.2.1 From 9a19fea43616066561e221359596ce532e631395 Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 11:45:16 -0800 Subject: [PATCH] Add initial implementation of klist helpers. This klist interface provides a couple of structures that wrap around struct list_head to provide explicit list "head" (struct klist) and list "node" (struct klist_node) objects. For struct klist, a spinlock is included that protects access to the actual list itself. struct klist_node provides a pointer to the klist that owns it and a kref reference count that indicates the number of current users of that node in the list. The entire point is to provide an interface for iterating over a list that is safe and allows for modification of the list during the iteration (e.g. insertion and removal), including modification of the current node on the list. It works using a 3rd object type - struct klist_iter - that is declared and initialized before an iteration. klist_next() is used to acquire the next element in the list. It returns NULL if there are no more items. This klist interface provides a couple of structures that wrap around struct list_head to provide explicit list "head" (struct klist) and list "node" (struct klist_node) objects. For struct klist, a spinlock is included that protects access to the actual list itself. struct klist_node provides a pointer to the klist that owns it and a kref reference count that indicates the number of current users of that node in the list. The entire point is to provide an interface for iterating over a list that is safe and allows for modification of the list during the iteration (e.g. insertion and removal), including modification of the current node on the list. It works using a 3rd object type - struct klist_iter - that is declared and initialized before an iteration. klist_next() is used to acquire the next element in the list. It returns NULL if there are no more items. Internally, that routine takes the klist's lock, decrements the reference count of the previous klist_node and increments the count of the next klist_node. It then drops the lock and returns. There are primitives for adding and removing nodes to/from a klist. When deleting, klist_del() will simply decrement the reference count. Only when the count goes to 0 is the node removed from the list. klist_remove() will try to delete the node from the list and block until it is actually removed. This is useful for objects (like devices) that have been removed from the system and must be freed (but must wait until all accessors have finished). Internally, that routine takes the klist's lock, decrements the reference count of the previous klist_node and increments the count of the next klist_node. It then drops the lock and returns. There are primitives for adding and removing nodes to/from a klist. When deleting, klist_del() will simply decrement the reference count. Only when the count goes to 0 is the node removed from the list. klist_remove() will try to delete the node from the list and block until it is actually removed. This is useful for objects (like devices) that have been removed from the system and must be freed (but must wait until all accessors have finished). Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman diff -Nru a/include/linux/klist.h b/include/linux/klist.h --- include/linux/klist.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/klist.h (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h new file mode 100644 index 000000000000..fb52f9d9d611 --- /dev/null +++ b/include/linux/klist.h @@ -0,0 +1,53 @@ +/* + * klist.h - Some generic list helpers, extending struct list_head a bit. + * + * Implementations are found in lib/klist.c + * + * + * Copyright (C) 2005 Patrick Mochel + * + * This file is rleased under the GPL v2. + */ + +#include +#include +#include +#include + + +struct klist { + spinlock_t k_lock; + struct list_head k_list; +}; + + +extern void klist_init(struct klist * k); + + +struct klist_node { + struct klist * n_klist; + struct list_head n_node; + struct kref n_ref; + struct completion n_removed; +}; + +extern void klist_add_tail(struct klist * k, struct klist_node * n); +extern void klist_add_head(struct klist * k, struct klist_node * n); + +extern void klist_del(struct klist_node * n); +extern void klist_remove(struct klist_node * n); + + +struct klist_iter { + struct klist * i_klist; + struct list_head * i_head; + struct klist_node * i_cur; +}; + + +extern void klist_iter_init(struct klist * k, struct klist_iter * i); +extern void klist_iter_init_node(struct klist * k, struct klist_iter * i, + struct klist_node * n); +extern void klist_iter_exit(struct klist_iter * i); +extern struct klist_node * klist_next(struct klist_iter * i); + -- cgit v1.2.1 From 465c7a3a3a5aabcedd2e43612cac5a12f23da19a Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 11:49:14 -0800 Subject: [PATCH] Add a klist to struct bus_type for its devices. - Use it for bus_for_each_dev(). - Use the klist spinlock instead of the bus rwsem. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 10f5aa20e041..e36953cf7f14 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ struct bus_type { struct subsystem subsys; struct kset drivers; struct kset devices; + struct klist klist_devices; struct bus_attribute * bus_attrs; struct device_attribute * dev_attrs; @@ -262,6 +264,7 @@ struct device { struct list_head bus_list; /* node in bus's list */ struct list_head driver_list; struct list_head children; + struct klist_node knode_bus; struct device * parent; struct kobject kobj; -- cgit v1.2.1 From 38fdac3cdce276554b4484a41f8ec2daf81cb2ff Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 12:00:18 -0800 Subject: [PATCH] Add a klist to struct bus_type for its drivers. - Use it in bus_for_each_drv(). - Use the klist spinlock instead of the bus rwsem. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e36953cf7f14..ea9ab33dfe71 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -53,6 +53,7 @@ struct bus_type { struct kset drivers; struct kset devices; struct klist klist_devices; + struct klist klist_drivers; struct bus_attribute * bus_attrs; struct device_attribute * dev_attrs; @@ -105,6 +106,7 @@ struct device_driver { struct completion unloaded; struct kobject kobj; struct list_head devices; + struct klist_node knode_bus; struct module * owner; -- cgit v1.2.1 From 94e7b1c5ff2055571703e38b059afffe17658432 Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Mon, 21 Mar 2005 12:25:36 -0800 Subject: [PATCH] Add a klist to struct device_driver for the devices bound to it. - Use it in driver_for_each_device() instead of the regular list_head and stop using the bus's rwsem for protection. - Use driver_for_each_device() in driver_detach() so we don't deadlock on the bus's rwsem. - Remove ->devices. - Move klist access and sysfs link access out from under device's semaphore, since they're synchronized through other means. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ea9ab33dfe71..96c71b59fdef 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -105,7 +105,7 @@ struct device_driver { struct completion unloaded; struct kobject kobj; - struct list_head devices; + struct klist klist_devices; struct klist_node knode_bus; struct module * owner; @@ -266,6 +266,7 @@ struct device { struct list_head bus_list; /* node in bus's list */ struct list_head driver_list; struct list_head children; + struct klist_node knode_driver; struct klist_node knode_bus; struct device * parent; -- cgit v1.2.1 From cb85b6f1cc811ecb9ed4b950206d8941ba710e68 Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Thu, 24 Mar 2005 10:48:35 -0800 Subject: [PATCH] Remove the unused device_find(). Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 96c71b59fdef..f1c38d869ac9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -372,7 +372,6 @@ extern int (*platform_notify_remove)(struct device * dev); */ extern struct device * get_device(struct device * dev); extern void put_device(struct device * dev); -extern struct device *device_find(const char *name, struct bus_type *bus); /* drivers/base/platform.c */ -- cgit v1.2.1 From 8b0c250be489dcbf1a3a33bb4ec4c7f33735a365 Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Thu, 24 Mar 2005 12:58:57 -0800 Subject: [PATCH] add klist_node_attached() to determine if a node is on a list or not. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman diff -Nru a/include/linux/klist.h b/include/linux/klist.h --- include/linux/klist.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index fb52f9d9d611..eebf5e5696ec 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -37,6 +37,8 @@ extern void klist_add_head(struct klist * k, struct klist_node * n); extern void klist_del(struct klist_node * n); extern void klist_remove(struct klist_node * n); +extern int klist_node_attached(struct klist_node * n); + struct klist_iter { struct klist * i_klist; -- cgit v1.2.1 From 7dc35cdf69149a7f2b5216ada9bafe359746ac1c Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Thu, 24 Mar 2005 13:03:35 -0800 Subject: [PATCH] Remove struct device::bus_list. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index f1c38d869ac9..13f65853e583 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt); struct device { struct list_head node; /* node in sibling list */ - struct list_head bus_list; /* node in bus's list */ struct list_head driver_list; struct list_head children; struct klist_node knode_driver; -- cgit v1.2.1 From 63c4f204ffc8219696bda88eb20c9873d007a2fc Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Thu, 24 Mar 2005 13:08:05 -0800 Subject: [PATCH] Remove struct device::driver_list. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 13f65853e583..d2434934d091 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt); struct device { struct list_head node; /* node in sibling list */ - struct list_head driver_list; struct list_head children; struct klist_node knode_driver; struct klist_node knode_bus; -- cgit v1.2.1 From 36239577cfb6b9a7c111209536b54200b0252ebf Mon Sep 17 00:00:00 2001 From: "mochel@digitalimplant.org" Date: Thu, 24 Mar 2005 19:08:30 -0800 Subject: [PATCH] Use a klist for device child lists. - Use klist iterator in device_for_each_child(), making it safe to use for removing devices. - Remove unused list_to_dev() function. - Kills all usage of devices_subsys.rwsem. Signed-off-by: Patrick Mochel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d2434934d091..43249260cd1c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -262,8 +262,8 @@ extern void class_device_destroy(struct class *cls, dev_t devt); struct device { - struct list_head node; /* node in sibling list */ - struct list_head children; + struct klist klist_children; + struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device * parent; @@ -298,12 +298,6 @@ struct device { void (*release)(struct device * dev); }; -static inline struct device * -list_to_dev(struct list_head *node) -{ - return list_entry(node, struct device, node); -} - static inline void * dev_get_drvdata (struct device *dev) { -- cgit v1.2.1 From 0d3e5a2e39b6ba2974e9e7c2a429018c45de8e76 Mon Sep 17 00:00:00 2001 From: Patrick Mochel Date: Tue, 5 Apr 2005 23:46:33 -0700 Subject: [PATCH] Driver Core: fix bk-driver-core kills ppc64 There's no check to see if the device is already bound to a driver, which could do bad things. The first thing to go wrong is that it will try to match a driver with a device already bound to one. In some cases (it appears with USB with drivers/usb/core/usb.c::usb_match_id()), some drivers will match a device based on the class type, so it would be common (especially for HID devices) to match a device that is already bound. The fun comes when ->probe() is called, it fails, then driver_probe_device() does this: dev->driver = NULL; Later on, that pointer could be be dereferenced without checking and cause hell to break loose. This problem could be nasty. It's very hardware dependent, since some devices could have a different set of matching qualifiers than others. Now, I don't quite see exactly where/how you were getting that crash. You're dereferencing bad memory, but I'm not sure which pointer was bad and where it came from, but it could have come from a couple of different places. The patch below will hopefully fix it all up for you. It's against 2.6.12-rc2-mm1, and does the following: - Move logic to driver_probe_device() and comments uncommon returns: 1 - If device is bound 0 - If device not bound, and no error error - If there was an error. - Move locking to caller of that function, since we want to lock a device for the entire time we're trying to bind it to a driver (to prevent against a driver being loaded at the same time). - Update __device_attach() and __driver_attach() to do that locking. - Check if device is already bound in __driver_attach() - Update the converse device_release_driver() so it locks the device around all of the operations. - Mark driver_probe_device() as static and remove export. It's an internal function, it should stay that way, and there are no other callers. If there is ever a need to export it, we can audit it as necessary. Signed-off-by: Andrew Morton --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 43249260cd1c..91aac349b9a7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -325,7 +325,6 @@ extern int device_for_each_child(struct device *, void *, * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ -extern int driver_probe_device(struct device_driver * drv, struct device * dev); extern void device_bind_driver(struct device * dev); extern void device_release_driver(struct device * dev); extern int device_attach(struct device * dev); -- cgit v1.2.1 From 4b45099b75832434c5113b9aed1499f8a69d13d5 Mon Sep 17 00:00:00 2001 From: Keiichiro Tokunaga Date: Sun, 8 May 2005 21:28:53 +0900 Subject: [PATCH] Driver core: unregister_node() for hotplug use This adds a generic function 'unregister_node()'. It is used to remove objects of a node going away for hotplug. All the devices on the node must be unregistered before calling this function. Signed-off-by: Keiichiro Tokunaga Signed-off-by: Greg Kroah-Hartman diff -puN drivers/base/node.c~numa_hp_base drivers/base/node.c --- include/linux/node.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index 6e0a697e594e..254dc3de650b 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -27,6 +27,7 @@ struct node { }; extern int register_node(struct node *, int, struct node *); +extern void unregister_node(struct node *node); #define to_node(sys_device) container_of(sys_device, struct node, sysdev) -- cgit v1.2.1 From acaefc25d21f850e47ecc5098d1e0bc442c526be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 May 2005 14:40:59 +0200 Subject: [PATCH] libfs: add simple attribute files Based on the discussion about spufs attributes, this is my suggestion for a more generic attribute file support that can be used by both debugfs and spufs. Simple attribute files behave similarly to sequential files from a kernel programmers perspective in that a standard set of file operations is provided and only an open operation needs to be written that registers file specific get() and set() functions. These operations are defined as void foo_set(void *data, u64 val); and u64 foo_get(void *data); where data is the inode->u.generic_ip pointer of the file and the operations just need to make send of that pointer. The infrastructure makes sure this works correctly with concurrent access and partial read calls. A macro named DEFINE_SIMPLE_ATTRIBUTE is provided to further simplify using the attributes. This patch already contains the changes for debugfs to use attributes for its internal file operations. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0180102dace1..9b8b696d4f15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1657,6 +1657,52 @@ static inline void simple_transaction_set(struct file *file, size_t n) ar->size = n; } +/* + * simple attribute files + * + * These attributes behave similar to those in sysfs: + * + * Writing to an attribute immediately sets a value, an open file can be + * written to multiple times. + * + * Reading from an attribute creates a buffer from the value that might get + * read with multiple read calls. When the attribute has been read + * completely, no further read calls are possible until the file is opened + * again. + * + * All attributes contain a text representation of a numeric value + * that are accessed with the get() and set() functions. + */ +#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_close, \ + .read = simple_attr_read, \ + .write = simple_attr_write, \ +}; + +static inline void __attribute__((format(printf, 1, 2))) +__simple_attr_check_format(const char *fmt, ...) +{ + /* don't do anything, just let the compiler check the arguments; */ +} + +int simple_attr_open(struct inode *inode, struct file *file, + u64 (*get)(void *), void (*set)(void *, u64), + const char *fmt); +int simple_attr_close(struct inode *inode, struct file *file); +ssize_t simple_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); +ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); + + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void) { -- cgit v1.2.1 From 54b6f35c99974e99e64c05c2895718355123c55f Mon Sep 17 00:00:00 2001 From: Yani Ioannou Date: Tue, 17 May 2005 06:39:34 -0400 Subject: [PATCH] Driver core: change device_attribute callbacks This patch adds the device_attribute paramerter to the device_attribute store and show sysfs callback functions, and passes a reference to the attribute when the callbacks are called. Signed-off-by: Yani Ioannou Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 91aac349b9a7..7b781a72b293 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -335,8 +335,10 @@ extern void driver_attach(struct device_driver * drv); struct device_attribute { struct attribute attr; - ssize_t (*show)(struct device * dev, char * buf); - ssize_t (*store)(struct device * dev, const char * buf, size_t count); + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); }; #define DEVICE_ATTR(_name,_mode,_show,_store) \ -- cgit v1.2.1 From 0a3e7eeabc9f76b7496488aad2d11626ff6a2a4f Mon Sep 17 00:00:00 2001 From: Yani Ioannou Date: Tue, 17 May 2005 22:59:05 -0400 Subject: [PATCH] I2C: add i2c sensor_device_attribute and macros This patch creates a new header with a potential standard i2c sensor attribute type (which simply includes an int representing the sensor number/index) and the associated macros, SENSOR_DEVICE_ATTR to define a static attribute and to_sensor_dev_attr to get a sensor_device_attribute reference from an embedded device_attribute reference. Signed-off-by: Yani Ioannou --- include/linux/i2c-sysfs.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/i2c-sysfs.h (limited to 'include/linux') diff --git a/include/linux/i2c-sysfs.h b/include/linux/i2c-sysfs.h new file mode 100644 index 000000000000..d7bf6ce11679 --- /dev/null +++ b/include/linux/i2c-sysfs.h @@ -0,0 +1,36 @@ +/* + * i2c-sysfs.h - i2c chip driver sysfs defines + * + * Copyright (C) 2005 Yani Ioannou + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _LINUX_I2C_SYSFS_H +#define _LINUX_I2C_SYSFS_H + +struct sensor_device_attribute{ + struct device_attribute dev_attr; + int index; +}; +#define to_sensor_dev_attr(_dev_attr) \ + container_of(_dev_attr, struct sensor_device_attribute, dev_attr) + +#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index) \ +struct sensor_device_attribute sensor_dev_attr_##_name = { \ + .dev_attr = __ATTR(_name,_mode,_show,_store), \ + .index = _index, \ +} + +#endif /* _LINUX_I2C_SYSFS_H */ -- cgit v1.2.1 From 988d186de5b6966a71a8cc52e6cb4895fd2f7799 Mon Sep 17 00:00:00 2001 From: Maneesh Soni Date: Tue, 31 May 2005 10:39:14 +0530 Subject: [PATCH] sysfs-iattr: add sysfs_setattr o This adds ->i_op->setattr VFS method for sysfs inodes. The changed attribues are saved in the persistent sysfs_dirent structure as a pointer to struct iattr. The struct iattr is allocated only for those sysfs_dirent's for which default attributes are getting changed. Thanks to Jon Smirl for this suggestion. Signed-off-by: Maneesh Soni Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d9cd2d31d377..392da5a6dacb 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -73,6 +73,7 @@ struct sysfs_dirent { int s_type; umode_t s_mode; struct dentry * s_dentry; + struct iattr * s_iattr; }; #define SYSFS_ROOT 0x0001 -- cgit v1.2.1 From 18b504e25fd617bee8830d2cdcaff7fb7b5931bb Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 21 Jun 2005 12:38:48 -0700 Subject: [NETLINK]: netlink_callback structure needs 5 args not 4 net/ipv4/tcp_diag.c uses up to ->args[4] Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 561d4dc75836..3029cad63a01 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -147,7 +147,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[4]; + long args[5]; }; struct netlink_notify -- cgit v1.2.1 From e45b1be8bcb3643808975a426fa3e201a2588e87 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 21 Jun 2005 14:01:30 -0700 Subject: [NETFILTER]: Kill lockhelp.h Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_core.h | 3 +- include/linux/netfilter_ipv4/ip_nat.h | 3 +- include/linux/netfilter_ipv4/listhelp.h | 1 - include/linux/netfilter_ipv4/lockhelp.h | 129 ----------------------- 4 files changed, 2 insertions(+), 134 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/lockhelp.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index d84be02cb4fc..694aec9b4784 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -1,7 +1,6 @@ #ifndef _IP_CONNTRACK_CORE_H #define _IP_CONNTRACK_CORE_H #include -#include /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use @@ -47,6 +46,6 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; -DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); +extern rwlock_t ip_conntrack_lock; #endif /* _IP_CONNTRACK_CORE_H */ diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index 2b72b86176f0..e201ec6e9905 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -50,10 +50,9 @@ struct ip_nat_multi_range_compat #ifdef __KERNEL__ #include -#include /* Protects NAT hash tables, and NAT-private part of conntracks. */ -DECLARE_RWLOCK_EXTERN(ip_nat_lock); +extern rwlock_t ip_nat_lock; /* The structure embedded in the conntrack structure. */ struct ip_nat_info diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h index f2ae7c5e57bb..360429f48737 100644 --- a/include/linux/netfilter_ipv4/listhelp.h +++ b/include/linux/netfilter_ipv4/listhelp.h @@ -2,7 +2,6 @@ #define _LISTHELP_H #include #include -#include /* Header to do more comprehensive job than linux/list.h; assume list is first entry in structure. */ diff --git a/include/linux/netfilter_ipv4/lockhelp.h b/include/linux/netfilter_ipv4/lockhelp.h deleted file mode 100644 index a3288633ab46..000000000000 --- a/include/linux/netfilter_ipv4/lockhelp.h +++ /dev/null @@ -1,129 +0,0 @@ -#ifndef _LOCKHELP_H -#define _LOCKHELP_H -#include - -#include -#include -#include -#include - -/* Header to do help in lock debugging. */ - -#ifdef CONFIG_NETFILTER_DEBUG -struct spinlock_debug -{ - spinlock_t l; - atomic_t locked_by; -}; - -struct rwlock_debug -{ - rwlock_t l; - long read_locked_map; - long write_locked_map; -}; - -#define DECLARE_LOCK(l) \ -struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) } -#define DECLARE_LOCK_EXTERN(l) \ -extern struct spinlock_debug l -#define DECLARE_RWLOCK(l) \ -struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 } -#define DECLARE_RWLOCK_EXTERN(l) \ -extern struct rwlock_debug l - -#define MUST_BE_LOCKED(l) \ -do { if (atomic_read(&(l)->locked_by) != smp_processor_id()) \ - printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l); \ -} while(0) - -#define MUST_BE_UNLOCKED(l) \ -do { if (atomic_read(&(l)->locked_by) == smp_processor_id()) \ - printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l); \ -} while(0) - -/* Write locked OK as well. */ -#define MUST_BE_READ_LOCKED(l) \ -do { if (!((l)->read_locked_map & (1UL << smp_processor_id())) \ - && !((l)->write_locked_map & (1UL << smp_processor_id()))) \ - printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \ -} while(0) - -#define MUST_BE_WRITE_LOCKED(l) \ -do { if (!((l)->write_locked_map & (1UL << smp_processor_id()))) \ - printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \ -} while(0) - -#define MUST_BE_READ_WRITE_UNLOCKED(l) \ -do { if ((l)->read_locked_map & (1UL << smp_processor_id())) \ - printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l); \ - else if ((l)->write_locked_map & (1UL << smp_processor_id())) \ - printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \ -} while(0) - -#define LOCK_BH(lk) \ -do { \ - MUST_BE_UNLOCKED(lk); \ - spin_lock_bh(&(lk)->l); \ - atomic_set(&(lk)->locked_by, smp_processor_id()); \ -} while(0) - -#define UNLOCK_BH(lk) \ -do { \ - MUST_BE_LOCKED(lk); \ - atomic_set(&(lk)->locked_by, -1); \ - spin_unlock_bh(&(lk)->l); \ -} while(0) - -#define READ_LOCK(lk) \ -do { \ - MUST_BE_READ_WRITE_UNLOCKED(lk); \ - read_lock_bh(&(lk)->l); \ - set_bit(smp_processor_id(), &(lk)->read_locked_map); \ -} while(0) - -#define WRITE_LOCK(lk) \ -do { \ - MUST_BE_READ_WRITE_UNLOCKED(lk); \ - write_lock_bh(&(lk)->l); \ - set_bit(smp_processor_id(), &(lk)->write_locked_map); \ -} while(0) - -#define READ_UNLOCK(lk) \ -do { \ - if (!((lk)->read_locked_map & (1UL << smp_processor_id()))) \ - printk("ASSERT: %s:%u %s not readlocked\n", \ - __FILE__, __LINE__, #lk); \ - clear_bit(smp_processor_id(), &(lk)->read_locked_map); \ - read_unlock_bh(&(lk)->l); \ -} while(0) - -#define WRITE_UNLOCK(lk) \ -do { \ - MUST_BE_WRITE_LOCKED(lk); \ - clear_bit(smp_processor_id(), &(lk)->write_locked_map); \ - write_unlock_bh(&(lk)->l); \ -} while(0) - -#else -#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED -#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l -#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED -#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l - -#define MUST_BE_LOCKED(l) -#define MUST_BE_UNLOCKED(l) -#define MUST_BE_READ_LOCKED(l) -#define MUST_BE_WRITE_LOCKED(l) -#define MUST_BE_READ_WRITE_UNLOCKED(l) - -#define LOCK_BH(l) spin_lock_bh(l) -#define UNLOCK_BH(l) spin_unlock_bh(l) - -#define READ_LOCK(l) read_lock_bh(l) -#define WRITE_LOCK(l) write_lock_bh(l) -#define READ_UNLOCK(l) read_unlock_bh(l) -#define WRITE_UNLOCK(l) write_unlock_bh(l) -#endif /*CONFIG_NETFILTER_DEBUG*/ - -#endif /* _LOCKHELP_H */ -- cgit v1.2.1 From 18b8afc771102b1b6af97962808291a7d27f52af Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 21 Jun 2005 14:01:57 -0700 Subject: [NETFILTER]: Kill nf_debug Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4.h | 6 ------ include/linux/skbuff.h | 13 ------------- 2 files changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 9e5750079e09..3ebc36afae1a 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -75,12 +75,6 @@ enum nf_ip_hook_priorities { #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -#ifdef CONFIG_NETFILTER_DEBUG -void nf_debug_ip_local_deliver(struct sk_buff *skb); -void nf_debug_ip_loopback_xmit(struct sk_buff *newskb); -void nf_debug_ip_finish_output2(struct sk_buff *skb); -#endif /*CONFIG_NETFILTER_DEBUG*/ - extern int ip_route_me_harder(struct sk_buff **pskb); /* Call this before modifying an existing IP packet: ensures it is diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc04f5cd2286..d7c839a21842 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -193,7 +193,6 @@ struct skb_shared_info { * @nfcache: Cache info * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection - * @nf_debug: Netfilter debugging * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index @@ -264,9 +263,6 @@ struct sk_buff { __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; -#ifdef CONFIG_NETFILTER_DEBUG - unsigned int nf_debug; -#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif @@ -1219,15 +1215,6 @@ static inline void nf_reset(struct sk_buff *skb) { nf_conntrack_put(skb->nfct); skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -} -static inline void nf_reset_debug(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif } #ifdef CONFIG_BRIDGE_NETFILTER -- cgit v1.2.1 From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Jun 2005 17:14:34 -0700 Subject: [PATCH] smp_processor_id() cleanup This patch implements a number of smp_processor_id() cleanup ideas that Arjan van de Ven and I came up with. The previous __smp_processor_id/_smp_processor_id/smp_processor_id API spaghetti was hard to follow both on the implementational and on the usage side. Some of the complexity arose from picking wrong names, some of the complexity comes from the fact that not all architectures defined __smp_processor_id. In the new code, there are two externally visible symbols: - smp_processor_id(): debug variant. - raw_smp_processor_id(): nondebug variant. Replaces all existing uses of _smp_processor_id() and __smp_processor_id(). Defined by every SMP architecture in include/asm-*/smp.h. There is one new internal symbol, dependent on DEBUG_PREEMPT: - debug_smp_processor_id(): internal debug variant, mapped to smp_processor_id(). Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new lib/smp_processor_id.c file. All related comments got updated and/or clarified. I have build/boot tested the following 8 .config combinations on x86: {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT} I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT. (Other architectures are untested, but should work just fine.) Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- include/linux/smp.h | 40 ++++++++++++++++------------------------ 2 files changed, 17 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e530c6c092f1..beacd931b606 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -381,7 +381,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, #include /* Returns the number of the current Node. */ -#define numa_node_id() (cpu_to_node(_smp_processor_id())) +#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) #ifndef CONFIG_DISCONTIGMEM diff --git a/include/linux/smp.h b/include/linux/smp.h index dcf1db3b35d3..9dfa3ee769ae 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -92,10 +92,7 @@ void smp_prepare_boot_cpu(void); /* * These macros fold the SMP functionality into a single CPU system */ - -#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT) -# define smp_processor_id() 0 -#endif +#define raw_smp_processor_id() 0 #define hard_smp_processor_id() 0 #define smp_call_function(func,info,retry,wait) ({ 0; }) #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) @@ -106,30 +103,25 @@ static inline void smp_send_reschedule(int cpu) { } #endif /* !SMP */ /* - * DEBUG_PREEMPT support: check whether smp_processor_id() is being - * used in a preemption-safe way. + * smp_processor_id(): get the current CPU ID. * - * An architecture has to enable this debugging code explicitly. - * It can do so by renaming the smp_processor_id() macro to - * __smp_processor_id(). This should only be done after some minimal - * testing, because usually there are a number of false positives - * that an architecture will trigger. + * if DEBUG_PREEMPT is enabled the we check whether it is + * used in a preemption-safe way. (smp_processor_id() is safe + * if it's used in a preemption-off critical section, or in + * a thread that is bound to the current CPU.) * - * To fix a false positive (i.e. smp_processor_id() use that the - * debugging code reports but which use for some reason is legal), - * change the smp_processor_id() reference to _smp_processor_id(), - * which is the nondebug variant. NOTE: don't use this to hack around - * real bugs. + * NOTE: raw_smp_processor_id() is for internal use only + * (smp_processor_id() is the preferred variant), but in rare + * instances it might also be used to turn off false positives + * (i.e. smp_processor_id() use that the debugging code reports but + * which use for some reason is legal). Don't use this to hack around + * the warning message, as your code might not work under PREEMPT. */ -#ifdef __smp_processor_id -# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) - extern unsigned int smp_processor_id(void); -# else -# define smp_processor_id() __smp_processor_id() -# endif -# define _smp_processor_id() __smp_processor_id() +#ifdef CONFIG_DEBUG_PREEMPT + extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() #else -# define _smp_processor_id() smp_processor_id() +# define smp_processor_id() raw_smp_processor_id() #endif #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) -- cgit v1.2.1 From 753ee728964e5afb80c17659cc6c3a6fd0a42fe0 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 21 Jun 2005 17:14:41 -0700 Subject: [PATCH] VM: early zone reclaim This is the core of the (much simplified) early reclaim. The goal of this patch is to reclaim some easily-freed pages from a zone before falling back onto another zone. One of the major uses of this is NUMA machines. With the default allocator behavior the allocator would look for memory in another zone, which might be off-node, before trying to reclaim from the current zone. This adds a zone tuneable to enable early zone reclaim. It is selected on a per-zone basis and is turned on/off via syscall. Adding some extra throttling on the reclaim was also required (patch 4/4). Without the machine would grind to a crawl when doing a "make -j" kernel build. Even with this patch the System Time is higher on average, but it seems tolerable. Here are some numbers for kernbench runs on a 2-node, 4cpu, 8Gig RAM Altix in the "make -j" run: wall user sys %cpu ctx sw. sleeps ---- ---- --- ---- ------ ------ No patch 1009 1384 847 258 298170 504402 w/patch, no reclaim 880 1376 667 288 254064 396745 w/patch & reclaim 1079 1385 926 252 291625 548873 These numbers are the average of 2 runs of 3 "make -j" runs done right after system boot. Run-to-run variability for "make -j" is huge, so these numbers aren't terribly useful except to seee that with reclaim the benchmark still finishes in a reasonable amount of time. I also looked at the NUMA hit/miss stats for the "make -j" runs and the reclaim doesn't make any difference when the machine is thrashing away. Doing a "make -j8" on a single node that is filled with page cache pages takes 700 seconds with reclaim turned on and 735 seconds without reclaim (due to remote memory accesses). The simple zone_reclaim syscall program is at http://www.bork.org/~mort/sgi/zone_reclaim.c Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++++ include/linux/swap.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index beacd931b606..dfc2452ccb10 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -144,6 +144,12 @@ struct zone { unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ + /* + * Does the allocator try to reclaim pages from the zone as soon + * as it fails a watermark_ok() in __alloc_pages? + */ + int reclaim_pages; + /* * prev_priority holds the scanning priority for this zone. It is * defined as the scanning priority at which we achieved our reclaim diff --git a/include/linux/swap.h b/include/linux/swap.h index 3bbc41be9bd0..0d21e682d99d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -173,6 +173,7 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); +extern int zone_reclaim(struct zone *, unsigned int, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; -- cgit v1.2.1 From 0c35bbadc59f5ed105c34471143eceb4c0dd9c95 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 21 Jun 2005 17:14:42 -0700 Subject: [PATCH] VM: add __GFP_NORECLAIM When using the early zone reclaim, it was noticed that allocating new pages that should be spread across the whole system caused eviction of local pages. This adds a new GFP flag to prevent early reclaim from happening during certain allocation attempts. The example that is implemented here is for page cache pages. We want page cache pages to be spread across the whole system, and we don't want page cache pages to evict other pages to get local memory. Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 ++- include/linux/pagemap.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index af7407e8cfc5..208535fd4832 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,6 +39,7 @@ struct vm_area_struct; #define __GFP_COMP 0x4000u /* Add compound page metadata */ #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ +#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) @@ -47,7 +48,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC) + __GFP_NOMEMALLOC|__GFP_NORECLAIM) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0422031161ba..d9a25647a295 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -52,12 +52,12 @@ void release_pages(struct page **pages, int nr, int cold); static inline struct page *page_cache_alloc(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x), 0); + return alloc_pages(mapping_gfp_mask(x)|__GFP_NORECLAIM, 0); } static inline struct page *page_cache_alloc_cold(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0); + return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD|__GFP_NORECLAIM, 0); } typedef int filler_t(void *, struct page *); -- cgit v1.2.1 From 1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 21 Jun 2005 17:14:43 -0700 Subject: [PATCH] VM: rate limit early reclaim When early zone reclaim is turned on the LRU is scanned more frequently when a zone is low on memory. This limits when the zone reclaim can be called by skipping the scan if another thread (either via kswapd or sync reclaim) is already reclaiming from the zone. Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dfc2452ccb10..18fed8b67943 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -149,6 +149,8 @@ struct zone { * as it fails a watermark_ok() in __alloc_pages? */ int reclaim_pages; + /* A count of how many reclaimers are scanning this zone */ + atomic_t reclaim_in_progress; /* * prev_priority holds the scanning priority for this zone. It is -- cgit v1.2.1 From 63551ae0feaaa23807ebea60de1901564bbef32e Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 21 Jun 2005 17:14:44 -0700 Subject: [PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6af1ae4a8211..f529d1442815 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -4,6 +4,7 @@ #ifdef CONFIG_HUGETLB_PAGE #include +#include struct ctl_table; @@ -22,12 +23,6 @@ int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write); -int is_aligned_hugepage_range(unsigned long addr, unsigned long len); -int pmd_huge(pmd_t pmd); struct page *alloc_huge_page(void); void free_huge_page(struct page *); @@ -35,6 +30,17 @@ extern unsigned long max_huge_pages; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; +/* arch callbacks */ + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write); +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write); +int is_aligned_hugepage_range(unsigned long addr, unsigned long len); +int pmd_huge(pmd_t pmd); + #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ @@ -48,6 +54,28 @@ extern int sysctl_hugetlb_shm_group; int prepare_hugepage_range(unsigned long addr, unsigned long len); #endif +#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE +#define set_huge_pte_at(mm, addr, ptep, pte) set_pte_at(mm, addr, ptep, pte) +#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep) +#else +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); +#endif + +#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK +#define hugetlb_prefault_arch_hook(mm) do { } while (0) +#else +void hugetlb_prefault_arch_hook(struct mm_struct *mm); +#endif + +#ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE +#define hugetlb_clean_stale_pgtable(pte) BUG() +#else +void hugetlb_clean_stale_pgtable(pte_t *pte); +#endif + #else /* !CONFIG_HUGETLB_PAGE */ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -- cgit v1.2.1 From e7c8d5c9955a4d2e88e36b640563f5d6d5aba48a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Jun 2005 17:14:47 -0700 Subject: [PATCH] node local per-cpu-pages This patch modifies the way pagesets in struct zone are managed. Each zone has a per-cpu array of pagesets. So any particular CPU has some memory in each zone structure which belongs to itself. Even if that CPU is not local to that zone. So the patch relocates the pagesets for each cpu to the node that is nearest to the cpu instead of allocating the pagesets in the (possibly remote) target zone. This means that the operations to manage pages on remote zone can be done with information available locally. We play a macro trick so that non-NUMA pmachines avoid the additional pointer chase on the page allocator fastpath. AIM7 benchmark on a 32 CPU SGI Altix w/o patches: Tasks jobs/min jti jobs/min/task real cpu 1 484.68 100 484.6769 12.01 1.97 Fri Mar 25 11:01:42 2005 100 27140.46 89 271.4046 21.44 148.71 Fri Mar 25 11:02:04 2005 200 30792.02 82 153.9601 37.80 296.72 Fri Mar 25 11:02:42 2005 300 32209.27 81 107.3642 54.21 451.34 Fri Mar 25 11:03:37 2005 400 34962.83 78 87.4071 66.59 588.97 Fri Mar 25 11:04:44 2005 500 31676.92 75 63.3538 91.87 742.71 Fri Mar 25 11:06:16 2005 600 36032.69 73 60.0545 96.91 885.44 Fri Mar 25 11:07:54 2005 700 35540.43 77 50.7720 114.63 1024.28 Fri Mar 25 11:09:49 2005 800 33906.70 74 42.3834 137.32 1181.65 Fri Mar 25 11:12:06 2005 900 34120.67 73 37.9119 153.51 1325.26 Fri Mar 25 11:14:41 2005 1000 34802.37 74 34.8024 167.23 1465.26 Fri Mar 25 11:17:28 2005 with slab API changes and pageset patch: Tasks jobs/min jti jobs/min/task real cpu 1 485.00 100 485.0000 12.00 1.96 Fri Mar 25 11:46:18 2005 100 28000.96 89 280.0096 20.79 150.45 Fri Mar 25 11:46:39 2005 200 32285.80 79 161.4290 36.05 293.37 Fri Mar 25 11:47:16 2005 300 40424.15 84 134.7472 43.19 438.42 Fri Mar 25 11:47:59 2005 400 39155.01 79 97.8875 59.46 590.05 Fri Mar 25 11:48:59 2005 500 37881.25 82 75.7625 76.82 730.19 Fri Mar 25 11:50:16 2005 600 39083.14 78 65.1386 89.35 872.79 Fri Mar 25 11:51:46 2005 700 38627.83 77 55.1826 105.47 1022.46 Fri Mar 25 11:53:32 2005 800 39631.94 78 49.5399 117.48 1169.94 Fri Mar 25 11:55:30 2005 900 36903.70 79 41.0041 141.94 1310.78 Fri Mar 25 11:57:53 2005 1000 36201.23 77 36.2012 160.77 1458.31 Fri Mar 25 12:00:34 2005 Signed-off-by: Christoph Lameter Signed-off-by: Shobhit Dayal Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ include/linux/mmzone.h | 11 ++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 17518fe0b311..1813b162b0a8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -691,6 +691,12 @@ extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +#ifdef CONFIG_NUMA +extern void setup_per_cpu_pageset(void); +#else +static inline void setup_per_cpu_pageset(void) {} +#endif + /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 18fed8b67943..4733d35d8223 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -63,6 +63,12 @@ struct per_cpu_pageset { #endif } ____cacheline_aligned_in_smp; +#ifdef CONFIG_NUMA +#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) +#else +#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) +#endif + #define ZONE_DMA 0 #define ZONE_NORMAL 1 #define ZONE_HIGHMEM 2 @@ -122,8 +128,11 @@ struct zone { */ unsigned long lowmem_reserve[MAX_NR_ZONES]; +#ifdef CONFIG_NUMA + struct per_cpu_pageset *pageset[NR_CPUS]; +#else struct per_cpu_pageset pageset[NR_CPUS]; - +#endif /* * free areas of different sizes */ -- cgit v1.2.1 From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001 From: Wolfgang Wander Date: Tue, 21 Jun 2005 17:14:49 -0700 Subject: [PATCH] Avoiding mmap fragmentation Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander Credit-to: "Richard Purdie" Signed-off-by: Ken Chen Acked-by: Ingo Molnar (partly) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4dbb109022f3..b58afd97a180 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -201,8 +201,8 @@ extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -extern void arch_unmap_area(struct vm_area_struct *area); -extern void arch_unmap_area_topdown(struct vm_area_struct *area); +extern void arch_unmap_area(struct mm_struct *, unsigned long); +extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define get_mm_counter(mm, member) ((mm)->_##member) @@ -218,9 +218,10 @@ struct mm_struct { unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); - void (*unmap_area) (struct vm_area_struct *area); - unsigned long mmap_base; /* base of mmap area */ - unsigned long free_area_cache; /* first hole */ + void (*unmap_area) (struct mm_struct *mm, unsigned long addr); + unsigned long mmap_base; /* base of mmap area */ + unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ + unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ -- cgit v1.2.1 From cbe37d093707762fc0abb280781e6a82a9d8d568 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Tue, 21 Jun 2005 17:14:52 -0700 Subject: [PATCH] mm: remove PG_highmem Remove PG_highmem, to save a page flag. Use is_highmem() instead. It'll generate a little more code, but we don't use PageHigheMem() in many places. Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 39ab8c6b5652..df313891db10 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -61,21 +61,20 @@ #define PG_active 6 #define PG_slab 7 /* slab debug (Suparna wants this) */ -#define PG_highmem 8 -#define PG_checked 9 /* kill me in 2.5.. */ -#define PG_arch_1 10 -#define PG_reserved 11 - -#define PG_private 12 /* Has something at ->private */ -#define PG_writeback 13 /* Page is under writeback */ -#define PG_nosave 14 /* Used for system suspend/resume */ -#define PG_compound 15 /* Part of a compound page */ - -#define PG_swapcache 16 /* Swap page: swp_entry_t in private */ -#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ -#define PG_reclaim 18 /* To be reclaimed asap */ -#define PG_nosave_free 19 /* Free, should not be written */ -#define PG_uncached 20 /* Page has been mapped as uncached */ +#define PG_checked 8 /* kill me in 2.5.. */ +#define PG_arch_1 9 +#define PG_reserved 10 +#define PG_private 11 /* Has something at ->private */ + +#define PG_writeback 12 /* Page is under writeback */ +#define PG_nosave 13 /* Used for system suspend/resume */ +#define PG_compound 14 /* Part of a compound page */ +#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ + +#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ +#define PG_reclaim 17 /* To be reclaimed asap */ +#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_uncached 19 /* Page has been mapped as uncached */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -215,7 +214,7 @@ extern void __mod_page_state(unsigned offset, unsigned long delta); #define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) #ifdef CONFIG_HIGHMEM -#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) +#define PageHighMem(page) is_highmem(page_zone(page)) #else #define PageHighMem(page) 0 /* needed to optimize away at compile time */ #endif -- cgit v1.2.1 From 1ad539b2bd89bf2e129123eb24d5bcc4484a35de Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Tue, 21 Jun 2005 17:14:53 -0700 Subject: [PATCH] vm: try_to_free_pages unused argument try_to_free_pages accepts a third argument, order, but hasn't used it since before 2.6.0. The following patch removes the argument and updates all the calls to try_to_free_pages. Signed-off-by: Darren Hart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0d21e682d99d..2343f999e6e1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -172,7 +172,7 @@ extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); +extern int try_to_free_pages(struct zone **, unsigned int); extern int zone_reclaim(struct zone *, unsigned int, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; -- cgit v1.2.1 From 83e5d8f7253cb7b14472385a6d57df1e9f848e8e Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 21 Jun 2005 17:14:54 -0700 Subject: [PATCH] __mod_page_state(): pass unsigned long instead of unsigned By making the offset argument of __mod_page_state an unsigned long instead of unsigned, we can avoid forcing the compiler to sign extend a usually constant argument. This saves 1 instruction on x86-64. Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index df313891db10..f2ee9b2332e3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,7 +136,7 @@ struct page_state { extern void get_page_state(struct page_state *ret); extern void get_full_page_state(struct page_state *ret); extern unsigned long __read_page_state(unsigned offset); -extern void __mod_page_state(unsigned offset, unsigned long delta); +extern void __mod_page_state(unsigned long offset, unsigned long delta); #define read_page_state(member) \ __read_page_state(offsetof(struct page_state, member)) -- cgit v1.2.1 From c2f29ea111e3344ed48257c2a142c3db514e1529 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 21 Jun 2005 17:14:55 -0700 Subject: [PATCH] __read_page_state(): pass unsigned long instead of unsigned By making the offset argument of __read_page_state an unsigned long instead of unsigned, we can avoid forcing the compiler to sign extend a usually constant argument. This saves 1 instruction on x86-64. Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f2ee9b2332e3..f5a6695d4d21 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -135,7 +135,7 @@ struct page_state { extern void get_page_state(struct page_state *ret); extern void get_full_page_state(struct page_state *ret); -extern unsigned long __read_page_state(unsigned offset); +extern unsigned long __read_page_state(unsigned long offset); extern void __mod_page_state(unsigned long offset, unsigned long delta); #define read_page_state(member) \ -- cgit v1.2.1 From 4ae7c03943fca73f23bc0cdb938070f41b98101f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Jun 2005 17:14:57 -0700 Subject: [PATCH] Periodically drain non local pagesets The pageset array can potentially acquire a huge amount of memory on large NUMA systems. F.e. on a system with 512 processors and 256 nodes there will be 256*512 pagesets. If each pageset only holds 5 pages then we are talking about 655360 pages.With a 16K page size on IA64 this results in potentially 10 Gigabytes of memory being trapped in pagesets. The typical cases are much less for smaller systems but there is still the potential of memory being trapped in off node pagesets. Off node memory may be rarely used if local memory is available and so we may potentially have memory in seldom used pagesets without this patch. The slab allocator flushes its per cpu caches every 2 seconds. The following patch flushes the off node pageset caches in the same way by tying into the slab flush. The patch also changes /proc/zoneinfo to include the number of pages currently in each pageset. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 208535fd4832..8d6bf608b199 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page)); #define free_page(addr) free_pages((addr),0) void page_alloc_init(void); +#ifdef CONFIG_NUMA +void drain_remote_pages(void); +#else +static inline void drain_remote_pages(void) { }; +#endif #endif /* __LINUX_GFP_H */ -- cgit v1.2.1 From f14f75b81187cdbe10cc53a521bf9fdf97b59f8c Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 21 Jun 2005 17:15:02 -0700 Subject: [PATCH] ia64 uncached alloc This patch contains the ia64 uncached page allocator and the generic allocator (genalloc). The uncached allocator was formerly part of the SN2 mspec driver but there are several other users of it so it has been split off from the driver. The generic allocator can be used by device driver to manage special memory etc. The generic allocator is based on the allocator from the sym53c8xx_2 driver. Various users on ia64 needs uncached memory. The SGI SN architecture requires it for inter-partition communication between partitions within a large NUMA cluster. The specific user for this is the XPC code. Another application is large MPI style applications which use it for synchronization, on SN this can be done using special 'fetchop' operations but it also benefits non SN hardware which may use regular uncached memory for this purpose. Performance of doing this through uncached vs cached memory is pretty substantial. This is handled by the mspec driver which I will push out in a seperate patch. Rather than creating a specific allocator for just uncached memory I came up with genalloc which is a generic purpose allocator that can be used by device drivers and other subsystems as they please. For instance to handle onboard device memory. It was derived from the sym53c7xx_2 driver's allocator which is also an example of a potential user (I am refraining from modifying sym2 right now as it seems to have been under fairly heavy development recently). On ia64 memory has various properties within a granule, ie. it isn't safe to access memory as uncached within the same granule as currently has memory accessed in cached mode. The regular system therefore doesn't utilize memory in the lower granules which is mixed in with device PAL code etc. The uncached driver walks the EFI memmap and pulls out the spill uncached pages and sticks them into the uncached pool. Only after these chunks have been utilized, will it start converting regular cached memory into uncached memory. Hence the reason for the EFI related code additions. Signed-off-by: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/genalloc.h (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h new file mode 100644 index 000000000000..7fd0576a4454 --- /dev/null +++ b/include/linux/genalloc.h @@ -0,0 +1,40 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This code is based on the buddy allocator found in the sym53c8xx_2 + * driver, adapted for general purpose use. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include + +#define ALLOC_MIN_SHIFT 5 /* 32 bytes minimum */ +/* + * Link between free memory chunks of a given size. + */ +struct gen_pool_link { + struct gen_pool_link *next; +}; + +/* + * Memory pool descriptor. + */ +struct gen_pool { + spinlock_t lock; + unsigned long (*get_new_chunk)(struct gen_pool *); + struct gen_pool *next; + struct gen_pool_link *h; + unsigned long private; + int max_chunk_shift; +}; + +unsigned long gen_pool_alloc(struct gen_pool *poolp, int size); +void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size); +struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift, + unsigned long (*fp)(struct gen_pool *), + unsigned long data); -- cgit v1.2.1 From 5b37b700f7c491a9320f4e29472bbaf23dded8fd Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 21 Jun 2005 17:15:18 -0700 Subject: [PATCH] ppc32: Added support for new MPC8548 family of PowerQUICC III processors Added descriptions of the new MPC8548 family processors, e500 core and peripherals. Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsl_devices.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index faaff4c64559..70f54af87b9f 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -51,6 +51,7 @@ struct gianfar_platform_data { /* board specific information */ u32 board_flags; + u32 phy_flags; u32 phyid; u32 interruptPHY; u8 mac_addr[6]; @@ -61,9 +62,14 @@ struct gianfar_platform_data { #define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 #define FSL_GIANFAR_DEV_HAS_RMON 0x00000004 #define FSL_GIANFAR_DEV_HAS_MULTI_INTR 0x00000008 +#define FSL_GIANFAR_DEV_HAS_CSUM 0x00000010 +#define FSL_GIANFAR_DEV_HAS_VLAN 0x00000020 +#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH 0x00000040 +#define FSL_GIANFAR_DEV_HAS_PADDING 0x00000080 /* Flags in gianfar_platform_data */ -#define FSL_GIANFAR_BRD_HAS_PHY_INTR 0x00000001 /* if not set use a timer */ +#define FSL_GIANFAR_BRD_HAS_PHY_INTR 0x00000001 /* set or use a timer */ +#define FSL_GIANFAR_BRD_IS_REDUCED 0x00000002 /* Set if RGMII, RMII */ struct fsl_i2c_platform_data { /* device specific information */ -- cgit v1.2.1 From 22329b511a97557b293583194037d1f4c71e1504 Mon Sep 17 00:00:00 2001 From: Brent Casavant Date: Tue, 21 Jun 2005 17:15:59 -0700 Subject: [PATCH] ioc4: Core driver rewrite This series of patches reworks the configuration and internal structure of the SGI IOC4 I/O controller device drivers. These changes are motivated by several factors: - The IOC4 chip PCI resources are of mixed use between functions (i.e. multiple functions are handled in the same address range, sometimes within the same register), muddling resource ownership and initialization issues. Centralizing this ownership in a core driver is desirable. - The IOC4 chip implements multiple functions (serial, IDE, others not yet implemented in the mainline kernel) but is not a multifunction PCI device. In order to properly handle device addition and removal as well as module insertion and deletion, an intermediary IOC4-specific driver layer is needed to handle these operations cleanly. - All IOC4 drivers are currently enabled by a single CONFIG value. As not all systems need all IOC4 functions, it is desireable to enable these drivers independently. - The current IOC4 core driver will trigger loading of all function-level drivers, as it makes direct calls to them. This situation should be reversed (i.e. function-level drivers cause loading of core driver) in order to maintain a clear and least-surprise driver loading model. - IOC4 hardware design necessitates some driver-level dependency on the PCI bus clock speed. Current code assumes a 66MHz bus, but the speed should be autodetected and appropriate compensation taken. This patch series effects the above changes by a newly and better designed IOC4 core driver with which the function-level drivers can register and deregister themselves upon module insertion/removal. By tracking these modules, device addition/removal is also handled properly. PCI resource management and ownership issues are centralized in this core driver, and IOC4-wide configuration actions such as bus speed detection are also handled in this core driver. This patch: The SGI IOC4 I/O controller chip implements multiple functions, though it is not a multi-function PCI device. Additionally, various PCI resources of the IOC4 are shared by multiple hardware functions, and thus resource ownership by driver is not clearly delineated. Due to the current driver design, all core and subordinate drivers must be loaded, or none, which is undesirable if not all IOC4 hardware features are being used. This patch reorganizes the IOC4 drivers so that the core driver provides a subdriver registration service. Through appropriate callbacks the subdrivers can now handle device addition and removal, as well as module insertion and deletion (though the IOC4 IDE driver requires further work before module deletion will work). The core driver now takes care of allocating PCI resources and data which must be shared between subdrivers, to clearly delineate module ownership of these items. Signed-off-by: Brent Casavant Acked-by: Pat Gefre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioc4.h | 156 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/ioc4_common.h | 21 ------ 2 files changed, 156 insertions(+), 21 deletions(-) create mode 100644 include/linux/ioc4.h delete mode 100644 include/linux/ioc4_common.h (limited to 'include/linux') diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h new file mode 100644 index 000000000000..729bfa4c4ac5 --- /dev/null +++ b/include/linux/ioc4.h @@ -0,0 +1,156 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifndef _LINUX_IOC4_H +#define _LINUX_IOC4_H + +#include + +/*********************************** + * Structures needed by subdrivers * + ***********************************/ + +/* This structure fully describes the IOC4 miscellaneous registers which + * appear at bar[0]+0x00000 through bar[0]+0x0005c. The corresponding + * PCI resource is managed by the main IOC4 driver because it contains + * registers of interest to many different IOC4 subdrivers. + */ +struct ioc4_misc_regs { + /* Miscellaneous IOC4 registers */ + union ioc4_pci_err_addr_l { + uint32_t raw; + struct { + uint32_t valid:1; /* Address captured */ + uint32_t master_id:4; /* Unit causing error + * 0/1: Serial port 0 TX/RX + * 2/3: Serial port 1 TX/RX + * 4/5: Serial port 2 TX/RX + * 6/7: Serial port 3 TX/RX + * 8: ATA/ATAPI + * 9-15: Undefined + */ + uint32_t mul_err:1; /* Multiple errors occurred */ + uint32_t addr:26; /* Bits 31-6 of error addr */ + } fields; + } pci_err_addr_l; + uint32_t pci_err_addr_h; /* Bits 63-32 of error addr */ + union ioc4_sio_int { + uint32_t raw; + struct { + uint8_t tx_mt:1; /* TX ring buffer empty */ + uint8_t rx_full:1; /* RX ring buffer full */ + uint8_t rx_high:1; /* RX high-water exceeded */ + uint8_t rx_timer:1; /* RX timer has triggered */ + uint8_t delta_dcd:1; /* DELTA_DCD seen */ + uint8_t delta_cts:1; /* DELTA_CTS seen */ + uint8_t intr_pass:1; /* Interrupt pass-through */ + uint8_t tx_explicit:1; /* TX, MCW, or delay complete */ + } fields[4]; + } sio_ir; /* Serial interrupt state */ + union ioc4_other_int { + uint32_t raw; + struct { + uint32_t ata_int:1; /* ATA port passthru */ + uint32_t ata_memerr:1; /* ATA halted by mem error */ + uint32_t memerr:4; /* Serial halted by mem err */ + uint32_t kbd_int:1; /* kbd/mouse intr asserted */ + uint32_t reserved:16; /* zero */ + uint32_t rt_int:1; /* INT_OUT section latch */ + uint32_t gen_int:8; /* Intr. from generic pins */ + } fields; + } other_ir; /* Other interrupt state */ + union ioc4_sio_int sio_ies; /* Serial interrupt enable set */ + union ioc4_other_int other_ies; /* Other interrupt enable set */ + union ioc4_sio_int sio_iec; /* Serial interrupt enable clear */ + union ioc4_other_int other_iec; /* Other interrupt enable clear */ + union ioc4_sio_cr { + uint32_t raw; + struct { + uint32_t cmd_pulse:4; /* Bytebus strobe width */ + uint32_t arb_diag:3; /* PCI bus requester */ + uint32_t sio_diag_idle:1; /* Active ser req? */ + uint32_t ata_diag_idle:1; /* Active ATA req? */ + uint32_t ata_diag_active:1; /* ATA req is winner */ + uint32_t reserved:22; /* zero */ + } fields; + } sio_cr; + uint32_t unused1; + union ioc4_int_out { + uint32_t raw; + struct { + uint32_t count:16; /* Period control */ + uint32_t mode:3; /* Output signal shape */ + uint32_t reserved:11; /* zero */ + uint32_t diag:1; /* Timebase control */ + uint32_t int_out:1; /* Current value */ + } fields; + } int_out; /* External interrupt output control */ + uint32_t unused2; + union ioc4_gpcr { + uint32_t raw; + struct { + uint32_t dir:8; /* Pin direction */ + uint32_t edge:8; /* Edge/level mode */ + uint32_t reserved1:4; /* zero */ + uint32_t int_out_en:1; /* INT_OUT enable */ + uint32_t reserved2:11; /* zero */ + } fields; + } gpcr_s; /* Generic PIO control set */ + union ioc4_gpcr gpcr_c; /* Generic PIO control clear */ + union ioc4_gpdr { + uint32_t raw; + struct { + uint32_t gen_pin:8; /* State of pins */ + uint32_t reserved:24; + } fields; + } gpdr; /* Generic PIO data */ + uint32_t unused3; + union ioc4_gppr { + uint32_t raw; + struct { + uint32_t gen_pin:1; /* Single pin state */ + uint32_t reserved:31; + } fields; + } gppr[8]; /* Generic PIO pins */ +}; + +/* One of these per IOC4 + * + * The idd_serial_data field is present here, even though it's used + * solely by the serial subdriver, because the main IOC4 module + * properly owns pci_{get,set}_drvdata functionality. This field + * allows that subdriver to stash its own drvdata somewhere. + */ +struct ioc4_driver_data { + struct list_head idd_list; + unsigned long idd_bar0; + struct pci_dev *idd_pdev; + const struct pci_device_id *idd_pci_id; + struct __iomem ioc4_misc_regs *idd_misc_regs; + void *idd_serial_data; +}; + +/* One per submodule */ +struct ioc4_submodule { + struct list_head is_list; + char *is_name; + struct module *is_owner; + int (*is_probe) (struct ioc4_driver_data *); + int (*is_remove) (struct ioc4_driver_data *); +}; + +#define IOC4_NUM_CARDS 8 /* max cards per partition */ + +/********************************** + * Functions needed by submodules * + **********************************/ + +extern int ioc4_register_submodule(struct ioc4_submodule *); +extern void ioc4_unregister_submodule(struct ioc4_submodule *); + +#endif /* _LINUX_IOC4_H */ diff --git a/include/linux/ioc4_common.h b/include/linux/ioc4_common.h deleted file mode 100644 index b03bcc46df55..000000000000 --- a/include/linux/ioc4_common.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. - */ - -#ifndef _LINUX_IOC4_COMMON_H -#define _LINUX_IOC4_COMMON_H - -/* prototypes */ - -int ioc4_serial_init(void); - -int ioc4_serial_attach_one(struct pci_dev *pdev, const struct - pci_device_id *pci_id); -int ioc4_ide_attach_one(struct pci_dev *pdev, const struct - pci_device_id *pci_id); - -#endif /* _LINUX_IOC4_COMMON_H */ -- cgit v1.2.1 From d4c477ca5448f19afaaf6c0cfd655009ea9e614d Mon Sep 17 00:00:00 2001 From: Brent Casavant Date: Tue, 21 Jun 2005 17:16:01 -0700 Subject: [PATCH] ioc4: PCI bus speed detection Several hardware features of SGI's IOC4 I/O controller chip require timing-related driver calculations dependent upon the PCI bus speed. This patch enables the core IOC4 driver code to detect the actual bus speed and store a value that can later be used by the IOC4 subdrivers as needed. Signed-off-by: Brent Casavant Acked-by: Pat Gefre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioc4.h | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h index 729bfa4c4ac5..3dd18b785ebd 100644 --- a/include/linux/ioc4.h +++ b/include/linux/ioc4.h @@ -11,6 +11,14 @@ #include +/*************** + * Definitions * + ***************/ + +/* Miscellaneous values inherent to hardware */ + +#define IOC4_EXTINT_COUNT_DIVISOR 520 /* PCI clocks per COUNT tick */ + /*********************************** * Structures needed by subdrivers * ***********************************/ @@ -119,19 +127,34 @@ struct ioc4_misc_regs { } gppr[8]; /* Generic PIO pins */ }; -/* One of these per IOC4 - * - * The idd_serial_data field is present here, even though it's used - * solely by the serial subdriver, because the main IOC4 module - * properly owns pci_{get,set}_drvdata functionality. This field - * allows that subdriver to stash its own drvdata somewhere. - */ +/* Masks for GPCR DIR pins */ +#define IOC4_GPCR_DIR_0 0x01 /* External interrupt output */ +#define IOC4_GPCR_DIR_1 0x02 /* External interrupt input */ +#define IOC4_GPCR_DIR_2 0x04 +#define IOC4_GPCR_DIR_3 0x08 /* Keyboard/mouse presence */ +#define IOC4_GPCR_DIR_4 0x10 /* Ser. port 0 xcvr select (0=232, 1=422) */ +#define IOC4_GPCR_DIR_5 0x20 /* Ser. port 1 xcvr select (0=232, 1=422) */ +#define IOC4_GPCR_DIR_6 0x40 /* Ser. port 2 xcvr select (0=232, 1=422) */ +#define IOC4_GPCR_DIR_7 0x80 /* Ser. port 3 xcvr select (0=232, 1=422) */ + +/* Masks for GPCR EDGE pins */ +#define IOC4_GPCR_EDGE_0 0x01 +#define IOC4_GPCR_EDGE_1 0x02 /* External interrupt input */ +#define IOC4_GPCR_EDGE_2 0x04 +#define IOC4_GPCR_EDGE_3 0x08 +#define IOC4_GPCR_EDGE_4 0x10 +#define IOC4_GPCR_EDGE_5 0x20 +#define IOC4_GPCR_EDGE_6 0x40 +#define IOC4_GPCR_EDGE_7 0x80 + +/* One of these per IOC4 */ struct ioc4_driver_data { struct list_head idd_list; unsigned long idd_bar0; struct pci_dev *idd_pdev; const struct pci_device_id *idd_pci_id; struct __iomem ioc4_misc_regs *idd_misc_regs; + unsigned long count_period; void *idd_serial_data; }; -- cgit v1.2.1 From dbce706e2550253c5ab6043f4f5dfde0cd02470f Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Tue, 21 Jun 2005 17:16:19 -0700 Subject: [PATCH] uml: add and use generic hw_controller_type->release With Chris Wedgwood Currently UML must explicitly call the UML-specific free_irq_by_irq_and_dev() for each free_irq call it's done. This is needed because ->shutdown and/or ->disable are only called when the last "action" for that irq is removed. Instead, for UML shared IRQs (UML IRQs are very often, if not always, shared), for each dev_id some setup is done, which must be cleared on the release of that fd. For instance, for each open console a new instance (i.e. new dev_id) of the same IRQ is requested(). Exactly, a fd is stored in an array (pollfds), which is after read by a host thread and passed to poll(). Each event registered by poll() triggers an interrupt. So, for each free_irq() we must remove the corresponding host fd from the table, which we do via this -release() method. In this patch we add an appropriate hook for this, and remove all uses of it by pointing the hook to the said procedure; this is safe to do since the said procedure. Also some cosmetic improvements are included. This is heavily based on some work by Chris Wedgwood, which however didn't get the patch merged for something I'd call a "misunderstanding" (the need for this patch wasn't cleanly explained, thus adding the generic hook was felt as undesirable). Signed-off-by: Paolo 'Blaisorblade' Giarrusso CC: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c3ff4d101667..b68ad80e2464 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -47,6 +47,7 @@ struct hw_interrupt_type { void (*ack)(unsigned int irq); void (*end)(unsigned int irq); void (*set_affinity)(unsigned int irq, cpumask_t dest); + void (*release)(unsigned int irq, void *dev_id); }; typedef struct hw_interrupt_type hw_irq_controller; -- cgit v1.2.1 From b77d6adc922b8bbf8b16b67f567958c42962cf88 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Tue, 21 Jun 2005 17:16:24 -0700 Subject: [PATCH] uml: make hw_controller_type->release exist only for archs needing it With Chris Wedgwood As suggested by Chris, we can make the "just added" method ->release conditional to UML only (better: to archs requesting it, i.e. only UML currently), so that other archs don't get this unneeded crud, and if UML won't need it any more we can kill this. Signed-off-by: Paolo 'Blaisorblade' Giarrusso CC: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index b68ad80e2464..7fc1022be9ee 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -47,7 +47,10 @@ struct hw_interrupt_type { void (*ack)(unsigned int irq); void (*end)(unsigned int irq); void (*set_affinity)(unsigned int irq, cpumask_t dest); + /* Currently used only by UML, might disappear one day.*/ +#ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); +#endif }; typedef struct hw_interrupt_type hw_irq_controller; -- cgit v1.2.1 From 8a96619145840c6eb50d85759f72d9337db411f7 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 21 Jun 2005 17:16:41 -0700 Subject: [PATCH] autofs4: subversion bump to identify these changes Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index a1657fb99516..9343c89d843c 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 4 -#define AUTOFS_PROTO_SUBVERSION 6 +#define AUTOFS_PROTO_SUBVERSION 7 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 -- cgit v1.2.1 From f5a9951c94e7a285a3d00648e3d790a7f016bd11 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 21 Jun 2005 17:16:58 -0700 Subject: [PATCH] fbdev: iomove removal Since no one is using the inbuf, outbuf of struct fb_pixmap I removed their use in the framebuffer console. The idea is instead move the pixmap functionality below the accelerated functions intead of on top as the way it is now. If there is no objection please apply. This is against Linus latestr GIT tree. Thank you. Signed-off-by: James Simmons Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index b468bf496547..e14942805dbd 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -524,11 +524,11 @@ struct fb_pixmap { u32 offset; /* current offset to buffer */ u32 buf_align; /* byte alignment of each bitmap */ u32 scan_align; /* alignment per scanline */ - u32 access_align; /* alignment per read/write */ + u32 access_align; /* alignment per read/write (bits) */ u32 flags; /* see FB_PIXMAP_* */ /* access methods */ - void (*outbuf)(struct fb_info *info, u8 *addr, u8 *src, unsigned int size); - u8 (*inbuf) (struct fb_info *info, u8 *addr); + void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size); + void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size); }; @@ -816,12 +816,6 @@ extern int unregister_framebuffer(struct fb_info *fb_info); extern int fb_prepare_logo(struct fb_info *fb_info); extern int fb_show_logo(struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); -extern void fb_iomove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf, - u8 *dst, u32 d_pitch, u8 *src, u32 idx, - u32 height, u32 shift_high, u32 shift_low, u32 mod); -extern void fb_iomove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf, - u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, - u32 height); extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf, u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); -- cgit v1.2.1 From 1154ea7dcd8eed758fb5ec47393a79d5a1f0bc43 Mon Sep 17 00:00:00 2001 From: Jaya Kumar Date: Tue, 21 Jun 2005 17:17:04 -0700 Subject: [PATCH] Framebuffer driver for Arc LCD board Add support for the Arc monochrome LCD board. The board uses KS108 controllers to drive individual 64x64 LCD matrices. The board can be paneled in a variety of setups such as 2x1=128x64, 4x4=256x256 and so on. The board/host interface is through GPIO. Signed-off-by: Jaya Kumar Cc: "Antonino A. Daplas" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/arcfb.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/linux/arcfb.h (limited to 'include/linux') diff --git a/include/linux/arcfb.h b/include/linux/arcfb.h new file mode 100644 index 000000000000..721e7654daeb --- /dev/null +++ b/include/linux/arcfb.h @@ -0,0 +1,8 @@ +#ifndef __LINUX_ARCFB_H__ +#define __LINUX_ARCFB_H__ + +#define FBIO_WAITEVENT _IO('F', 0x88) +#define FBIO_GETCONTROL2 _IOR('F', 0x89, size_t) + +#endif + -- cgit v1.2.1 From d5881eb4883ef7dd28a4dcea237714817c4b2f8e Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 21 Jun 2005 17:17:05 -0700 Subject: [PATCH] fbdev: new pci id for chipsfb Patch adds pci ID for CT 69000 chipset. Signed-off-by: James Simmons Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b8b4ebf9abf1..63e89e47b8e9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -575,6 +575,7 @@ #define PCI_DEVICE_ID_CT_65550 0x00e0 #define PCI_DEVICE_ID_CT_65554 0x00e4 #define PCI_DEVICE_ID_CT_65555 0x00e5 +#define PCI_DEVICE_ID_CT_69000 0x00c0 #define PCI_VENDOR_ID_MIRO 0x1031 #define PCI_DEVICE_ID_MIRO_36050 0x5601 -- cgit v1.2.1 From 303b86d9913eca0cbfc3c5cb41e7006f6e13b755 Mon Sep 17 00:00:00 2001 From: Jurriaan Date: Tue, 21 Jun 2005 17:17:06 -0700 Subject: [PATCH] New framebuffer fonts + updated 12x22 font available Improve the fonts for use with the framebuffer. I've added all the characters marked 'FIXME' in the sun12x22 font and created a 10x18 font (based on the sun12x22 font) and a 7x14 font (based on the vga8x16 font). This patch is non-intrusive, no options are enabled by default so most users won't notice a thing. I am placing my changes under the GPL, however, I've not seen any copyright notices on the sun12x22 font and the vga8x16 font which I derived my new fonts from so I don't know what the copyright status is. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/font.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index fc2d690c9d5f..8fc80a7d78ac 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -25,19 +25,23 @@ struct font_desc { #define VGA8x16_IDX 1 #define PEARL8x8_IDX 2 #define VGA6x11_IDX 3 -#define SUN8x16_IDX 4 -#define SUN12x22_IDX 5 -#define ACORN8x8_IDX 6 -#define MINI4x6_IDX 7 +#define FONT7x14_IDX 4 +#define FONT10x18_IDX 5 +#define SUN8x16_IDX 6 +#define SUN12x22_IDX 7 +#define ACORN8x8_IDX 8 +#define MINI4x6_IDX 9 extern struct font_desc font_vga_8x8, - font_vga_8x16, - font_pearl_8x8, - font_vga_6x11, - font_sun_8x16, - font_sun_12x22, - font_acorn_8x8, - font_mini_4x6; + font_vga_8x16, + font_pearl_8x8, + font_vga_6x11, + font_7x14, + font_10x18, + font_sun_8x16, + font_sun_12x22, + font_acorn_8x8, + font_mini_4x6; /* Find a font with a specific name */ -- cgit v1.2.1 From f1ab5dac251bb4514607918b0019a3b3f5f5fb48 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 21 Jun 2005 17:17:07 -0700 Subject: [PATCH] fbdev: stack reduction Shrink the stack when calling the drawing alignment functions. Signed-off-by: James Simmons Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index e14942805dbd..bc24beeed971 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -816,12 +816,9 @@ extern int unregister_framebuffer(struct fb_info *fb_info); extern int fb_prepare_logo(struct fb_info *fb_info); extern int fb_show_logo(struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); -extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf, - u8 *dst, u32 d_pitch, u8 *src, u32 idx, +extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); -extern void fb_sysmove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf, - u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, - u32 height); +extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height); extern void fb_set_suspend(struct fb_info *info, int state); extern int fb_get_color_depth(struct fb_var_screeninfo *var); extern int fb_get_options(char *name, char **option); -- cgit v1.2.1 From 06d91a5fe0b50c9060e70bdf7786f8a3c66249db Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:12 -0700 Subject: [PATCH] md: improve locking on 'safemode' and move superblock writes When md marks the superblock dirty before a write, it calls generic_make_request (to write the superblock) from within generic_make_request (to write the first dirty block), which could cause problems later. With this patch, the superblock write is always done by the helper thread, and write request are delayed until that write completes. Also, the locking around marking the array dirty and writing the superblock is improved to avoid possible races. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md.h | 2 +- include/linux/raid/md_k.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index a6a67d102bfa..cfde8f497d6d 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), extern void md_unregister_thread (mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); -extern void md_write_start(mddev_t *mddev); +extern int md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index c9a0d4013be7..d92db54255a3 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -15,6 +15,9 @@ #ifndef _MD_K_H #define _MD_K_H +/* and dm-bio-list.h is not under include/linux because.... ??? */ +#include "../../../drivers/md/dm-bio-list.h" + #define MD_RESERVED 0UL #define LINEAR 1UL #define RAID0 2UL @@ -252,6 +255,10 @@ struct mddev_s atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; + + spinlock_t write_lock; + struct bio_list write_list; + unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. */ -- cgit v1.2.1 From 57afd89f98a990747445f01c458ecae64263b2f8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:13 -0700 Subject: [PATCH] md: improve the interface to sync_request 1/ change the return value (which is number-of-sectors synced) from 'int' to 'sector_t'. The number of sectors is usually easily small enough to fit in an int, but if resync needs to abort, it may want to return the total number of remaining sectors, which could be large. Also errors cannot be returned as negative numbers now, so use 0 instead 2/ Add a 'skipped' return parameter to allow the array to report that it skipped the sectors. This allows md to take this into account in the speed calculations. Currently there is no important skipping, but the bitmap-based-resync that is coming will use this. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index d92db54255a3..bce0032decff 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -298,7 +298,7 @@ struct mdk_personality_s int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); int (*hot_remove_disk) (mddev_t *mddev, int number); int (*spare_active) (mddev_t *mddev); - int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); + sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); int (*resize) (mddev_t *mddev, sector_t sectors); int (*reshape) (mddev_t *mddev, int raid_disks); int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); -- cgit v1.2.1 From 32a7627cf3a35396a8e834faf34e38ae9f3b1309 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:14 -0700 Subject: [PATCH] md: optimised resync using Bitmap based intent logging With this patch, the intent to write to some block in the array can be logged to a bitmap file. Each bit represents some number of sectors and is set before any update happens, and only cleared when all writes relating to all sectors are complete. After an unclean shutdown, information in this bitmap can be used to optimise resync - only sectors which could be out-of-sync need to be updated. Also if a drive is removed and then added back into an array, the recovery can make use of the bitmap to optimise reconstruction. This is not implemented in this patch. Currently the bitmap is stored in a file which must (obviously) be stored on a separate device. The patch only provided infrastructure. It does not update any personalities to bitmap intent logging. Md arrays can still be used with no bitmap file. This patch has minimal impact on such arrays. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 280 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/raid/md_k.h | 4 + include/linux/raid/md_u.h | 7 ++ 3 files changed, 291 insertions(+) create mode 100644 include/linux/raid/bitmap.h (limited to 'include/linux') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h new file mode 100644 index 000000000000..f785cf26cbad --- /dev/null +++ b/include/linux/raid/bitmap.h @@ -0,0 +1,280 @@ +/* + * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 + * + * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. + */ +#ifndef BITMAP_H +#define BITMAP_H 1 + +#define BITMAP_MAJOR 3 +#define BITMAP_MINOR 38 + +/* + * in-memory bitmap: + * + * Use 16 bit block counters to track pending writes to each "chunk". + * The 2 high order bits are special-purpose, the first is a flag indicating + * whether a resync is needed. The second is a flag indicating whether a + * resync is active. + * This means that the counter is actually 14 bits: + * + * +--------+--------+------------------------------------------------+ + * | resync | resync | counter | + * | needed | active | | + * | (0-1) | (0-1) | (0-16383) | + * +--------+--------+------------------------------------------------+ + * + * The "resync needed" bit is set when: + * a '1' bit is read from storage at startup. + * a write request fails on some drives + * a resync is aborted on a chunk with 'resync active' set + * It is cleared (and resync-active set) when a resync starts across all drives + * of the chunk. + * + * + * The "resync active" bit is set when: + * a resync is started on all drives, and resync_needed is set. + * resync_needed will be cleared (as long as resync_active wasn't already set). + * It is cleared when a resync completes. + * + * The counter counts pending write requests, plus the on-disk bit. + * When the counter is '1' and the resync bits are clear, the on-disk + * bit can be cleared aswell, thus setting the counter to 0. + * When we set a bit, or in the counter (to start a write), if the fields is + * 0, we first set the disk bit and set the counter to 1. + * + * If the counter is 0, the on-disk bit is clear and the stipe is clean + * Anything that dirties the stipe pushes the counter to 2 (at least) + * and sets the on-disk bit (lazily). + * If a periodic sweep find the counter at 2, it is decremented to 1. + * If the sweep find the counter at 1, the on-disk bit is cleared and the + * counter goes to zero. + * + * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block + * counters as a fallback when "page" memory cannot be allocated: + * + * Normal case (page memory allocated): + * + * page pointer (32-bit) + * + * [ ] ------+ + * | + * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) + * c1 c2 c2048 + * + * Hijacked case (page memory allocation failed): + * + * hijacked page pointer (32-bit) + * + * [ ][ ] (no page memory allocated) + * counter #1 (16-bit) counter #2 (16-bit) + * + */ + +#ifdef __KERNEL__ + +#define PAGE_BITS (PAGE_SIZE << 3) +#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) + +typedef __u16 bitmap_counter_t; +#define COUNTER_BITS 16 +#define COUNTER_BIT_SHIFT 4 +#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8) +#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) + +#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) +#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) +#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) +#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) +#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) +#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) + +/* how many counters per page? */ +#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) +/* same, except a shift value for more efficient bitops */ +#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) +/* same, except a mask value for more efficient bitops */ +#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) + +#define BITMAP_BLOCK_SIZE 512 +#define BITMAP_BLOCK_SHIFT 9 + +/* how many blocks per chunk? (this is variable) */ +#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) +#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) +#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) + +/* when hijacked, the counters and bits represent even larger "chunks" */ +/* there will be 1024 chunks represented by each counter in the page pointers */ +#define PAGEPTR_BLOCK_RATIO(bitmap) \ + (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) +#define PAGEPTR_BLOCK_SHIFT(bitmap) \ + (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) +#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) + +/* + * on-disk bitmap: + * + * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap + * file a page at a time. There's a superblock at the start of the file. + */ + +/* map chunks (bits) to file pages - offset by the size of the superblock */ +#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) + +#endif + +/* + * bitmap structures: + */ + +#define BITMAP_MAGIC 0x6d746962 + +/* use these for bitmap->flags and bitmap->sb->state bit-fields */ +enum bitmap_state { + BITMAP_ACTIVE = 0x001, /* the bitmap is in use */ + BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */ +}; + +/* the superblock at the front of the bitmap file -- little endian */ +typedef struct bitmap_super_s { + __u32 magic; /* 0 BITMAP_MAGIC */ + __u32 version; /* 4 the bitmap major for now, could change... */ + __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ + __u64 events; /* 24 event counter for the bitmap (1)*/ + __u64 events_cleared;/*32 event counter when last bit cleared (2) */ + __u64 sync_size; /* 40 the size of the md device's sync range(3) */ + __u32 state; /* 48 bitmap state information */ + __u32 chunksize; /* 52 the bitmap chunk size in bytes */ + __u32 daemon_sleep; /* 56 seconds between disk flushes */ + + __u8 pad[256 - 60]; /* set to zero */ +} bitmap_super_t; + +/* notes: + * (1) This event counter is updated before the eventcounter in the md superblock + * When a bitmap is loaded, it is only accepted if this event counter is equal + * to, or one greater than, the event counter in the superblock. + * (2) This event counter is updated when the other one is *if*and*only*if* the + * array is not degraded. As bits are not cleared when the array is degraded, + * this represents the last time that any bits were cleared. + * If a device is being added that has an event count with this value or + * higher, it is accepted as conforming to the bitmap. + * (3)This is the number of sectors represented by the bitmap, and is the range that + * resync happens across. For raid1 and raid5/6 it is the size of individual + * devices. For raid10 it is the size of the array. + */ + +#ifdef __KERNEL__ + +/* the in-memory bitmap is represented by bitmap_pages */ +struct bitmap_page { + /* + * map points to the actual memory page + */ + char *map; + /* + * in emergencies (when map cannot be alloced), hijack the map + * pointer and use it as two counters itself + */ + unsigned int hijacked:1; + /* + * count of dirty bits on the page + */ + unsigned int count:31; +}; + +/* keep track of bitmap file pages that have pending writes on them */ +struct page_list { + struct list_head list; + struct page *page; +}; + +/* the main bitmap structure - one per mddev */ +struct bitmap { + struct bitmap_page *bp; + unsigned long pages; /* total number of pages in the bitmap */ + unsigned long missing_pages; /* number of pages not yet allocated */ + + mddev_t *mddev; /* the md device that the bitmap is for */ + + int counter_bits; /* how many bits per block counter */ + + /* bitmap chunksize -- how much data does each bit represent? */ + unsigned long chunksize; + unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ + unsigned long chunks; /* total number of data chunks for the array */ + + /* We hold a count on the chunk currently being synced, and drop + * it when the last block is started. If the resync is aborted + * midway, we need to be able to drop that count, so we remember + * the counted chunk.. + */ + unsigned long syncchunk; + + __u64 events_cleared; + + /* bitmap spinlock */ + spinlock_t lock; + + struct file *file; /* backing disk file */ + struct page *sb_page; /* cached copy of the bitmap file superblock */ + struct page **filemap; /* list of cache pages for the file */ + unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ + unsigned long file_pages; /* number of pages in the file */ + + unsigned long flags; + + /* + * the bitmap daemon - periodically wakes up and sweeps the bitmap + * file, cleaning up bits and flushing out pages to disk as necessary + */ + unsigned long daemon_lastrun; /* jiffies of last run */ + unsigned long daemon_sleep; /* how many seconds between updates? */ + + /* + * bitmap write daemon - this daemon performs writes to the bitmap file + * this thread is only needed because of a limitation in ext3 (jbd) + * that does not allow a task to have two journal transactions ongoing + * simultaneously (even if the transactions are for two different + * filesystems) -- in the case of bitmap, that would be the filesystem + * that the bitmap file resides on and the filesystem that is mounted + * on the md device -- see current->journal_info in jbd/transaction.c + */ + mdk_thread_t *writeback_daemon; + spinlock_t write_lock; + struct semaphore write_ready; + struct semaphore write_done; + unsigned long writes_pending; + wait_queue_head_t write_wait; + struct list_head write_pages; + struct list_head complete_pages; + mempool_t *write_pool; +}; + +/* the bitmap API */ + +/* these are used only by md/bitmap */ +int bitmap_create(mddev_t *mddev); +void bitmap_destroy(mddev_t *mddev); +int bitmap_active(struct bitmap *bitmap); + +char *file_path(struct file *file, char *buf, int count); +void bitmap_print_sb(struct bitmap *bitmap); +int bitmap_update_sb(struct bitmap *bitmap); + +int bitmap_setallbits(struct bitmap *bitmap); + +/* these are exported */ +int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors); +void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, + int success); +int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks); +void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); +void bitmap_close_sync(struct bitmap *bitmap); + +int bitmap_unplug(struct bitmap *bitmap); +int bitmap_daemon_work(struct bitmap *bitmap); +#endif + +#endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index bce0032decff..16e94a9f0f8c 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -267,6 +267,9 @@ struct mddev_s atomic_t writes_pending; request_queue_t *queue; /* for plugging ... */ + struct bitmap *bitmap; /* the bitmap for the device */ + struct file *bitmap_file; /* the bitmap file */ + struct list_head all_mddevs; }; @@ -341,6 +344,7 @@ typedef struct mdk_thread_s { unsigned long flags; struct completion *event; struct task_struct *tsk; + unsigned long timeout; const char *name; } mdk_thread_t; diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index a2df5c2a42af..81da20ccec4d 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -23,6 +23,7 @@ #define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) #define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) #define RAID_AUTORUN _IO (MD_MAJOR, 0x14) +#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) /* configuration */ #define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) @@ -36,6 +37,7 @@ #define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) #define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) #define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a) +#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) /* usage */ #define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) @@ -106,6 +108,11 @@ typedef struct mdu_start_info_s { } mdu_start_info_t; +typedef struct mdu_bitmap_file_s +{ + char pathname[4096]; +} mdu_bitmap_file_t; + typedef struct mdu_param_s { int personality; /* 1,2,3,4 */ -- cgit v1.2.1 From 77ad4bc706fe6c52ab953f31c287a6af712d080c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:21 -0700 Subject: [PATCH] md: enable the bitmap write-back daemon and wait for it. Currently we don't wait for updates to the bitmap to be flushed to disk properly. The infrastructure all there, but it isn't being used.... A separate kernel thread (bitmap_writeback_daemon) is needed to wait for each page as we cannot get callbacks when a page write completes. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index f785cf26cbad..cfe60cfc8f3d 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -233,21 +233,12 @@ struct bitmap { unsigned long daemon_sleep; /* how many seconds between updates? */ /* - * bitmap write daemon - this daemon performs writes to the bitmap file - * this thread is only needed because of a limitation in ext3 (jbd) - * that does not allow a task to have two journal transactions ongoing - * simultaneously (even if the transactions are for two different - * filesystems) -- in the case of bitmap, that would be the filesystem - * that the bitmap file resides on and the filesystem that is mounted - * on the md device -- see current->journal_info in jbd/transaction.c + * bitmap_writeback_daemon waits for file-pages that have been written, + * as there is no way to get a call-back when a page write completes. */ mdk_thread_t *writeback_daemon; spinlock_t write_lock; - struct semaphore write_ready; - struct semaphore write_done; - unsigned long writes_pending; wait_queue_head_t write_wait; - struct list_head write_pages; struct list_head complete_pages; mempool_t *write_pool; }; -- cgit v1.2.1 From 191ea9b2c7cc3ebbe0678834ab710d7d95ad3f9a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:23 -0700 Subject: [PATCH] md: raid1 support for bitmap intent logging Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/raid1.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index abbfdd9afe1e..9d93cf12e890 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h @@ -36,12 +36,21 @@ struct r1_private_data_s { spinlock_t device_lock; struct list_head retry_list; + /* queue pending writes and submit them on unplug */ + struct bio_list pending_bio_list; + /* queue of writes that have been unplugged */ + struct bio_list flushing_bio_list; + /* for use when syncing mirrors: */ spinlock_t resync_lock; - int nr_pending; - int barrier; + int nr_pending; + int barrier; sector_t next_resync; + int fullsync; /* set to 1 if a full sync is needed, + * (fresh device added). + * Cleared when a sync completes. + */ wait_queue_head_t wait_idle; wait_queue_head_t wait_resume; @@ -85,14 +94,17 @@ struct r1bio_s { int read_disk; struct list_head retry_list; + struct bitmap_update *bitmap_update; /* * if the IO is in WRITE direction, then multiple bios are used. * We choose the number when they are allocated. */ struct bio *bios[0]; + /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ }; /* bits for r1bio.state */ #define R1BIO_Uptodate 0 #define R1BIO_IsSync 1 +#define R1BIO_Degraded 2 #endif -- cgit v1.2.1 From 41158c7eb22312cfaa256744e1553bb4042ff085 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:25 -0700 Subject: [PATCH] md: optimise reconstruction when re-adding a recently failed drive. When an array is degraded, bit in the intent-bitmap are never cleared. So if a recently failed drive is re-added, we only need to reconstruct the block that are still reflected in the bitmap. This patch adds support for this re-adding. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 16e94a9f0f8c..6cdcb4434c6c 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -183,6 +183,10 @@ struct mdk_rdev_s int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ + int saved_raid_disk; /* role that device used to have in the + * array and could again if we did a partial + * resync from the bitmap + */ atomic_t nr_pending; /* number of pending requests. * only maintained for arrays that -- cgit v1.2.1 From 3d310eb7b3df1252e8595d059d982b0a9825a137 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:26 -0700 Subject: [PATCH] md: fix deadlock due to md thread processing delayed requests. Before completing a 'write' the md superblock might need to be updated. This is best done by the md_thread. The current code schedules this up and queues the write request for later handling by the md_thread. However some personalities (Raid5/raid6) will deadlock if the md_thread tries to submit requests to its own array. So this patch changes things so the processes submitting the request waits for the superblock to be written and then submits the request itself. This fixes a recently-created deadlock in raid5/raid6 Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md.h | 2 +- include/linux/raid/md_k.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index cfde8f497d6d..75f41d8faed2 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), extern void md_unregister_thread (mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); -extern int md_write_start(mddev_t *mddev, struct bio *bi); +extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 6cdcb4434c6c..3e977025cf43 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -261,7 +261,7 @@ struct mddev_s sector_t recovery_cp; spinlock_t write_lock; - struct bio_list write_list; + wait_queue_head_t sb_wait; /* for waiting on superblock updates */ unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. -- cgit v1.2.1 From a654b9d8f851f4ca02649d5825cbe6c608adb10c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:27 -0700 Subject: [PATCH] md: allow md intent bitmap to be stored near the superblock. This provides an alternate to storing the bitmap in a separate file. The bitmap can be stored at a given offset from the superblock. Obviously the creator of the array must make sure this doesn't intersect with data.... After is good for version-0.90 superblocks. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 2 ++ include/linux/raid/md.h | 15 ++++++++++++++- include/linux/raid/md_k.h | 4 ++++ include/linux/raid/md_p.h | 7 ++++++- 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index cfe60cfc8f3d..e24b74b11150 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -217,6 +217,7 @@ struct bitmap { /* bitmap spinlock */ spinlock_t lock; + long offset; /* offset from superblock if file is NULL */ struct file *file; /* backing disk file */ struct page *sb_page; /* cached copy of the bitmap file superblock */ struct page **filemap; /* list of cache pages for the file */ @@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap); int bitmap_update_sb(struct bitmap *bitmap); int bitmap_setallbits(struct bitmap *bitmap); +void bitmap_write_all(struct bitmap *bitmap); /* these are exported */ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors); diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 75f41d8faed2..ffa316ce4dc8 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -60,7 +60,14 @@ */ #define MD_MAJOR_VERSION 0 #define MD_MINOR_VERSION 90 -#define MD_PATCHLEVEL_VERSION 1 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + */ +#define MD_PATCHLEVEL_VERSION 2 extern int register_md_personality (int p_num, mdk_personality_t *p); extern int unregister_md_personality (int p_num); @@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev); extern void md_print_devices (void); +extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, + sector_t sector, int size, struct page *page); +extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, + struct page *page, int rw); + + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 3e977025cf43..a3725b57fb7d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -273,6 +273,10 @@ struct mddev_s struct bitmap *bitmap; /* the bitmap for the device */ struct file *bitmap_file; /* the bitmap file */ + long bitmap_offset; /* offset from superblock of + * start of bitmap. May be + * negative, but not '0' + */ struct list_head all_mddevs; }; diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8ba95d67329f..8e592a25a8b5 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -96,6 +96,7 @@ typedef struct mdp_device_descriptor_s { #define MD_SB_CLEAN 0 #define MD_SB_ERRORS 1 +#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ typedef struct mdp_superblock_s { /* * Constant generic information @@ -184,7 +185,7 @@ struct mdp_superblock_1 { /* constant array information - 128 bytes */ __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ __u32 major_version; /* 1 */ - __u32 feature_map; /* 0 for now */ + __u32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ __u32 pad0; /* always set to 0 when writing */ __u8 set_uuid[16]; /* user-space generated. */ @@ -197,6 +198,10 @@ struct mdp_superblock_1 { __u32 chunksize; /* in 512byte sectors */ __u32 raid_disks; + __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ __u8 pad1[128-96]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ -- cgit v1.2.1 From 7bfa19f2748000d646dbdf8f48258cfe1d257b52 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:28 -0700 Subject: [PATCH] md: allow md to update multiple superblocks in parallel. currently, md updates all superblocks (one on each device) in series. It waits for one write to complete before starting the next. This isn't a big problem as superblock updates don't happen that often. However it is neater to do it in parallel, and if the drives in the array have gone to "sleep" after a period of idleness, then waking them is parallel is faster (and someone else should be worrying about power drain). Futher, we will need parallel superblock updates for a future patch which keeps the intent-logging bitmap near the superblock. Also remove the silly code that retired superblock updates 100 times. This simply never made sense. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index a3725b57fb7d..8c14ba565a45 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -262,6 +262,7 @@ struct mddev_s spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ + atomic_t pending_writes; /* number of active superblock writes */ unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. -- cgit v1.2.1 From 39730960d94306d7be414e8d54f4e5c071af1278 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:28 -0700 Subject: [PATCH] Two small fixes for md verion-1 superblocks. 1/ Must typecast int to (sector_t) before inverting or we might not invert enough bits. 2/ When "bitmap_offset" was added to mdp_superblock_1, we didn't increase the count of words-used (96 to 100). Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8e592a25a8b5..dc65cd435494 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -202,7 +202,7 @@ struct mdp_superblock_1 { * NOTE: signed, so bitmap can be before superblock * only meaningful of feature_map[0] is set. */ - __u8 pad1[128-96]; /* set to 0 when written */ + __u8 pad1[128-100]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ __u64 data_offset; /* sector start of data, often 0 */ -- cgit v1.2.1