diff options
author | Sage Weil <sage@newdream.net> | 2010-11-16 11:14:34 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-01-12 15:15:12 -0800 |
commit | 6c0f3af72cb1622a66962a1180c36ef8c41be8e2 (patch) | |
tree | 66e415bf31ea31a3e9360c0ce624fd20b6050c89 | |
parent | 3c0eee3fe6a3a1c745379547c7e7c904aa64f6d5 (diff) | |
download | blackbird-op-linux-6c0f3af72cb1622a66962a1180c36ef8c41be8e2.tar.gz blackbird-op-linux-6c0f3af72cb1622a66962a1180c36ef8c41be8e2.zip |
ceph: add dir_layout to inode
Add a ceph_dir_layout to the inode, and calculate dentry hash values based
on the parent directory's specified dir_hash function. This is needed
because the old default Linux dcache hash function is extremely week and
leads to a poor distribution of files among dir fragments.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/dir.c | 20 | ||||
-rw-r--r-- | fs/ceph/export.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 16 | ||||
-rw-r--r-- | net/ceph/ceph_hash.c | 3 |
6 files changed, 41 insertions, 4 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d902948a90d8..562f9884a4d9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1216,6 +1216,26 @@ void ceph_dentry_lru_del(struct dentry *dn) } } +/* + * Return name hash for a given dentry. This is dependent on + * the parent directory's hash function. + */ +unsigned ceph_dentry_hash(struct dentry *dn) +{ + struct inode *dir = dn->d_parent->d_inode; + struct ceph_inode_info *dci = ceph_inode(dir); + + switch (dci->i_dir_layout.dl_dir_hash) { + case 0: /* for backward compat */ + case CEPH_STR_HASH_LINUX: + return dn->d_name.hash; + + default: + return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + dn->d_name.name, dn->d_name.len); + } +} + const struct file_operations ceph_dir_fops = { .read = ceph_read_dir, .readdir = ceph_readdir, diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 2297d9426992..e41056174bf8 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode); - cfh->parent_name_hash = parent->d_name.hash; + cfh->parent_name_hash = ceph_dentry_hash(parent); *max_len = connected_handle_length; type = 2; } else if (*max_len >= handle_length) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bf1286588f26..045283ce4413 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_release_count = 0; ci->i_symlink = NULL; + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); + ci->i_fragtree = RB_ROOT; mutex_init(&ci->i_fragtree_mutex); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7f01728a4657..6e0826695112 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -239,6 +239,7 @@ struct ceph_inode_info { unsigned i_ceph_flags; unsigned long i_release_count; + struct ceph_dir_layout i_dir_layout; struct ceph_file_layout i_layout; char *i_symlink; @@ -768,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn); extern void ceph_dentry_lru_touch(struct dentry *dn); extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_invalidate_dentry_lease(struct dentry *dentry); +extern unsigned ceph_dentry_hash(struct dentry *dn); /* * our d_ops vary depending on whether the inode is live, diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c3c74aef289d..09dcc0c2ffd5 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -43,6 +43,10 @@ #define CEPH_FEATURE_NOSRCADDR (1<<1) #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) #define CEPH_FEATURE_FLOCK (1<<3) +#define CEPH_FEATURE_SUBSCRIBE2 (1<<4) +#define CEPH_FEATURE_MONNAMES (1<<5) +#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) +#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) /* @@ -55,10 +59,10 @@ struct ceph_file_layout { __le32 fl_stripe_count; /* over this many objects */ __le32 fl_object_size; /* until objects are this big, then move to new objects */ - __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ + __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */ /* pg -> disk layout */ - __le32 fl_object_stripe_unit; /* for per-object parity, if any */ + __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ @@ -69,6 +73,12 @@ struct ceph_file_layout { int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); +struct ceph_dir_layout { + __u8 dl_dir_hash; /* see ceph_hash.h for ids */ + __u8 dl_unused1; + __u16 dl_unused2; + __u32 dl_unused3; +} __attribute__ ((packed)); /* crypto algorithms */ #define CEPH_CRYPTO_NONE 0x0 @@ -457,7 +467,7 @@ struct ceph_mds_reply_inode { struct ceph_timespec rctime; struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ } __attribute__ ((packed)); -/* followed by frag array, then symlink string, then xattr blob */ +/* followed by frag array, symlink string, dir layout, xattr blob */ /* reply_lease follows dname, and reply_inode */ struct ceph_mds_reply_lease { diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 815ef8826796..0a1b53bce76d 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -1,5 +1,6 @@ #include <linux/ceph/types.h> +#include <linux/module.h> /* * Robert Jenkin's hash function. @@ -104,6 +105,7 @@ unsigned ceph_str_hash(int type, const char *s, unsigned len) return -1; } } +EXPORT_SYMBOL(ceph_str_hash); const char *ceph_str_hash_name(int type) { @@ -116,3 +118,4 @@ const char *ceph_str_hash_name(int type) return "unknown"; } } +EXPORT_SYMBOL(ceph_str_hash_name); |