diff options
Diffstat (limited to 'fs/ext2')
-rw-r--r-- | fs/ext2/CHANGES | 157 | ||||
-rw-r--r-- | fs/ext2/Makefile | 12 | ||||
-rw-r--r-- | fs/ext2/acl.c | 518 | ||||
-rw-r--r-- | fs/ext2/acl.h | 82 | ||||
-rw-r--r-- | fs/ext2/balloc.c | 699 | ||||
-rw-r--r-- | fs/ext2/bitmap.c | 25 | ||||
-rw-r--r-- | fs/ext2/dir.c | 673 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 160 | ||||
-rw-r--r-- | fs/ext2/file.c | 68 | ||||
-rw-r--r-- | fs/ext2/fsync.c | 51 | ||||
-rw-r--r-- | fs/ext2/ialloc.c | 735 | ||||
-rw-r--r-- | fs/ext2/inode.c | 1276 | ||||
-rw-r--r-- | fs/ext2/ioctl.c | 81 | ||||
-rw-r--r-- | fs/ext2/namei.c | 418 | ||||
-rw-r--r-- | fs/ext2/super.c | 1161 | ||||
-rw-r--r-- | fs/ext2/symlink.c | 52 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 1043 | ||||
-rw-r--r-- | fs/ext2/xattr.h | 118 | ||||
-rw-r--r-- | fs/ext2/xattr_security.c | 53 | ||||
-rw-r--r-- | fs/ext2/xattr_trusted.c | 64 | ||||
-rw-r--r-- | fs/ext2/xattr_user.c | 77 |
21 files changed, 7523 insertions, 0 deletions
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES new file mode 100644 index 000000000000..aa5aaf0e5911 --- /dev/null +++ b/fs/ext2/CHANGES @@ -0,0 +1,157 @@ +Changes from version 0.5a to version 0.5b +========================================= + - Now that we have sysctl(), the immutable flag cannot be changed when + the system is running at security level > 0. + - Some cleanups in the code. + - More consistency checks on directories. + - The ext2.diff patch from Tom May <ftom@netcom.com> has been + integrated. This patch replaces expensive "/" and "%" with + cheap ">>" and "&" where possible. + +Changes from version 0.5 to version 0.5a +======================================== + - Zero the partial block following the end of the file when a file + is truncated. + - Dates updated in the copyright. + - More checks when the filesystem is mounted: the count of blocks, + fragments, and inodes per group is checked against the block size. + - The buffers used by the error routines are now static variables, to + avoid using space on the kernel stack, as requested by Linus. + - Some cleanups in the error messages (some versions of syslog contain + a bug which truncates an error message if it contains '\n'). + - Check that no data can be written to a file past the 2GB limit. + - The famous readdir() bug has been fixed by Stephen Tweedie. + - Added a revision level in the superblock. + - Full support for O_SYNC flag of the open system call. + - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes + ext2fs to consider user #uid like root for the reserved blocks. + `resgid' acts the same way with group #gid. New fields in the + superblock contain default values for resuid and resgid and can + be modified by tune2fs. + Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>. + - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs + to remove the blocks used for FS structures from the total block + count in statfs. With `minixdf', ext2fs mimics Minix behavior + in statfs (i.e. it returns the total number of blocks on the + partition). This is intended to make bde happy :-) + - New file attributes: + - Immutable files cannot be modified. Data cannot be written to + these files. They cannot be removed, renamed and new links cannot + be created. Even root cannot modify the files. He has to remove + the immutable attribute first. + - Append-only files: can only be written in append-mode when writing. + They cannot be removed, renamed and new links cannot be created. + Note: files may only be added to an append-only directory. + - No-dump files: the attribute is not used by the kernel. My port + of dump uses it to avoid backing up files which are not important. + - New check in ext2_check_dir_entry: the inode number is checked. + - Support for big file systems: the copy of the FS descriptor is now + dynamically allocated (previous versions used a fixed size array). + This allows to mount 2GB+ FS. + - Reorganization of the ext2_inode structure to allow other operating + systems to create specific fields if they use ext2fs as their native + file system. Currently, ext2fs is only implemented in Linux but + will soon be part of Gnu Hurd and of Masix. + +Changes from version 0.4b to version 0.5 +======================================== + - New superblock fields: s_lastcheck and s_checkinterval added + by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks + of the file system + - Real random numbers for secure rm added by Pierre del Perugia + <delperug@gla.ecoledoc.ibp.fr> + - The mount warnings related to the state of a fs are not printed + if the fs is mounted read-only, idea by Nick Holloway + <alfie@dcs.warwick.ac.uk> + +Changes from version 0.4a to version 0.4b +========================================= + - Copyrights changed to include the name of my laboratory. + - Clean up of balloc.c and ialloc.c. + - More consistency checks. + - Block preallocation added by Stephen Tweedie. + - Direct reads of directories disallowed. + - Readahead implemented in readdir by Stephen Tweedie. + - Bugs in block and inodes allocation fixed. + - Readahead implemented in ext2_find_entry by Chip Salzenberg. + - New mount options: + `check=none|normal|strict' + `debug' + `errors=continue|remount-ro|panic' + `grpid', `bsdgroups' + `nocheck' + `nogrpid', `sysvgroups' + - truncate() now tries to deallocate contiguous blocks in a single call + to ext2_free_blocks(). + - lots of cosmetic changes. + +Changes from version 0.4 to version 0.4a +======================================== + - the `sync' option support is now complete. Version 0.4 was not + supporting it when truncating a file. I have tested the synchronous + writes and they work but they make the system very slow :-( I have + to work again on this to make it faster. + - when detecting an error on a mounted filesystem, version 0.4 used + to try to write a flag in the super block even if the filesystem had + been mounted read-only. This is fixed. + - the `sb=#' option now causes the kernel code to use the filesystem + descriptors located at block #+1. Version 0.4 used the superblock + backup located at block # but used the main copy of the descriptors. + - a new file attribute `S' is supported. This attribute causes + synchronous writes but is applied to a file not to the entire file + system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for + suggesting it). + - the directory cache is inhibited by default. The cache management + code seems to be buggy and I have to look at it carefully before + using it again. + - deleting a file with the `s' attribute (secure deletion) causes its + blocks to be overwritten with random values not with zeros (thanks to + Michael A. Griffith <grif@cs.ucr.edu> for suggesting it). + - lots of cosmetic changes have been made. + +Changes from version 0.3 to version 0.4 +======================================= + - Three new mount options are supported: `check', `sync' and `sb=#'. + `check' tells the kernel code to make more consistency checks + when the file system is mounted. Currently, the kernel code checks + that the blocks and inodes bitmaps are consistent with the free + blocks and inodes counts. More checks will be added in future + releases. + `sync' tells the kernel code to use synchronous writes when updating + an inode, a bitmap, a directory entry or an indirect block. This + can make the file system much slower but can be a big win for files + recovery in case of a crash (and we can now say to the BSD folks + that Linux also supports synchronous updates :-). + `sb=#' tells the kernel code to use an alternate super block instead + of its master copy. `#' is the number of the block (counted in + 1024 bytes blocks) which contains the alternate super block. + An ext2 file system typically contains backups of the super block + at blocks 8193, 16385, and so on. + - I have change the meaning of the valid flag used by e2fsck. it + now contains the state of the file system. If the kernel code + detects an inconsistency while the file system is mounted, it flags + it as erroneous and e2fsck will detect that on next run. + - The super block now contains a mount counter. This counter is + incremented each time the file system is mounted read/write. When + this counter becomes bigger than a maximal mount counts (also stored + in the super block), e2fsck checks the file system, even if it had + been unmounted cleanly, and resets this counter to 0. + - File attributes are now supported. One can associate a set of + attributes to a file. Three attributes are defined: + `c': the file is marked for automatic compression, + `s': the file is marked for secure deletion: when the file is + deleted, its blocks are zeroed and written back to the disk, + `u': the file is marked for undeletion: when the file is deleted, + its contents are saved to allow a future undeletion. + Currently, only the `s' attribute is implemented in the kernel + code. Support for the other attributes will be added in a future + release. + - a few bugs related to times updates have been fixed by Bruce + Evans and me. + - a bug related to the links count of deleted inodes has been fixed. + Previous versions used to keep the links count set to 1 when a file + was deleted. The new version now sets links_count to 0 when deleting + the last link. + - a race condition when deallocating an inode has been fixed by + Stephen Tweedie. + diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile new file mode 100644 index 000000000000..ee240a14e70f --- /dev/null +++ b/fs/ext2/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the linux ext2-filesystem routines. +# + +obj-$(CONFIG_EXT2_FS) += ext2.o + +ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o + +ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o +ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c new file mode 100644 index 000000000000..8369ee8d28c4 --- /dev/null +++ b/fs/ext2/acl.c @@ -0,0 +1,518 @@ +/* + * linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +/* + * Convert from filesystem to in-memory representation. + */ +static struct posix_acl * +ext2_acl_from_disk(const void *value, size_t size) +{ + const char *end = (char *)value + size; + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(ext2_acl_header)) + return ERR_PTR(-EINVAL); + if (((ext2_acl_header *)value)->a_version != + cpu_to_le32(EXT2_ACL_VERSION)) + return ERR_PTR(-EINVAL); + value = (char *)value + sizeof(ext2_acl_header); + count = ext2_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n=0; n < count; n++) { + ext2_acl_entry *entry = + (ext2_acl_entry *)value; + if ((char *)value + sizeof(ext2_acl_entry_short) > end) + goto fail; + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext2_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext2_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; + } + } + if (value != end) + goto fail; + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +/* + * Convert from in-memory to filesystem representation. + */ +static void * +ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + ext2_acl_header *ext_acl; + char *e; + size_t n; + + *size = ext2_acl_size(acl->a_count); + ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) + + acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL); + if (!ext_acl) + return ERR_PTR(-ENOMEM); + ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); + e = (char *)ext_acl + sizeof(ext2_acl_header); + for (n=0; n < acl->a_count; n++) { + ext2_acl_entry *entry = (ext2_acl_entry *)e; + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = + cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext2_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext2_acl_entry_short); + break; + + default: + goto fail; + } + } + return (char *)ext_acl; + +fail: + kfree(ext_acl); + return ERR_PTR(-EINVAL); +} + +static inline struct posix_acl * +ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT2_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +/* + * inode->i_sem: don't care + */ +static struct posix_acl * +ext2_get_acl(struct inode *inode, int type) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return NULL; + + switch(type) { + case ACL_TYPE_ACCESS: + acl = ext2_iget_acl(inode, &ei->i_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext2_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); + } + retval = ext2_xattr_get(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ext2_xattr_get(inode, name_index, "", value, retval); + } + if (retval > 0) + acl = ext2_acl_from_disk(value, retval); + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; + else + acl = ERR_PTR(retval); + if (value) + kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ext2_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext2_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return acl; +} + +/* + * inode->i_sem: down + */ +static int +ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + int name_index; + void *value = NULL; + size_t size; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + + switch(type) { + case ACL_TYPE_ACCESS: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + mark_inode_dirty(inode); + if (error == 0) + acl = NULL; + } + } + break; + + case ACL_TYPE_DEFAULT: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + if (acl) { + value = ext2_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + error = ext2_xattr_set(inode, name_index, "", value, size, 0); + + if (value) + kfree(value); + if (!error) { + switch(type) { + case ACL_TYPE_ACCESS: + ext2_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext2_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return error; +} + +static int +ext2_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, ext2_check_acl); +} + +/* + * Initialize the ACLs of a new inode. Called from ext2_new_inode. + * + * dir->i_sem: down + * inode->i_sem: up (access to inode is still exclusive) + */ +int +ext2_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(dir->i_sb, POSIX_ACL)) { + acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext2_set_acl(inode, + ACL_TYPE_ACCESS, clone); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return error; +} + +/* + * Does chmod for an inode that may have an Access Control List. The + * inode->i_mode field must be updated to the desired value by the caller + * before calling this function. + * Returns 0 on success, or a negative error number. + * + * We change the ACL rather than storing some ACL entries in the file + * mode permission bits (which would be more efficient), because that + * would break once additional permissions (like ACL_APPEND, ACL_DELETE + * for directories) are added. There are no more bits available in the + * file mode. + * + * inode->i_sem: down + */ +int +ext2_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + return error; +} + +/* + * Extended attribut handlers + */ +static size_t +ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_size) + memcpy(list, XATTR_NAME_ACL_ACCESS, size); + return size; +} + +static size_t +ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_size) + memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + return size; +} + +static int +ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ext2_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int +ext2_xattr_get_acl_access(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int +ext2_xattr_get_acl_default(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int +ext2_xattr_set_acl(struct inode *inode, int type, const void *value, + size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else + acl = NULL; + + error = ext2_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +static int +ext2_xattr_set_acl_access(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int +ext2_xattr_set_acl_default(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ext2_xattr_acl_access_handler = { + .prefix = XATTR_NAME_ACL_ACCESS, + .list = ext2_xattr_list_acl_access, + .get = ext2_xattr_get_acl_access, + .set = ext2_xattr_set_acl_access, +}; + +struct xattr_handler ext2_xattr_acl_default_handler = { + .prefix = XATTR_NAME_ACL_DEFAULT, + .list = ext2_xattr_list_acl_default, + .get = ext2_xattr_get_acl_default, + .set = ext2_xattr_set_acl_default, +}; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h new file mode 100644 index 000000000000..fed96ae81a7d --- /dev/null +++ b/fs/ext2/acl.h @@ -0,0 +1,82 @@ +/* + File: fs/ext2/acl.h + + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#include <linux/xattr_acl.h> + +#define EXT2_ACL_VERSION 0x0001 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext2_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext2_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext2_acl_header; + +static inline size_t ext2_acl_size(int count) +{ + if (count <= 4) { + return sizeof(ext2_acl_header) + + count * sizeof(ext2_acl_entry_short); + } else { + return sizeof(ext2_acl_header) + + 4 * sizeof(ext2_acl_entry_short) + + (count - 4) * sizeof(ext2_acl_entry); + } +} + +static inline int ext2_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext2_acl_header); + s = size - 4 * sizeof(ext2_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext2_acl_entry_short)) + return -1; + return size / sizeof(ext2_acl_entry_short); + } else { + if (s % sizeof(ext2_acl_entry)) + return -1; + return s / sizeof(ext2_acl_entry) + 4; + } +} + +#ifdef CONFIG_EXT2_FS_POSIX_ACL + +/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl + if the ACL has not been cached */ +#define EXT2_ACL_NOT_CACHED ((void *)-1) + +/* acl.c */ +extern int ext2_permission (struct inode *, int, struct nameidata *); +extern int ext2_acl_chmod (struct inode *); +extern int ext2_init_acl (struct inode *, struct inode *); + +#else +#include <linux/sched.h> +#define ext2_permission NULL +#define ext2_get_acl NULL +#define ext2_set_acl NULL + +static inline int +ext2_acl_chmod (struct inode *inode) +{ + return 0; +} + +static inline int ext2_init_acl (struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif + diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c new file mode 100644 index 000000000000..6591abef64d0 --- /dev/null +++ b/fs/ext2/balloc.c @@ -0,0 +1,699 @@ +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/config.h> +#include "ext2.h" +#include <linux/quotaops.h> +#include <linux/sched.h> +#include <linux/buffer_head.h> + +/* + * balloc.c contains the blocks allocation and deallocation routines + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long offset; + struct ext2_group_desc * desc; + struct ext2_sb_info *sbi = EXT2_SB(sb); + + if (block_group >= sbi->s_groups_count) { + ext2_error (sb, "ext2_get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); + + return NULL; + } + + group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); + offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); + if (!sbi->s_group_desc[group_desc]) { + ext2_error (sb, "ext2_get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); + return NULL; + } + + desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data; + if (bh) + *bh = sbi->s_group_desc[group_desc]; + return desc + offset; +} + +/* + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. + * + * Return buffer_head on success or NULL in case of failure. + */ +static struct buffer_head * +read_block_bitmap(struct super_block *sb, unsigned int block_group) +{ + struct ext2_group_desc * desc; + struct buffer_head * bh = NULL; + + desc = ext2_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; + bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + if (!bh) + ext2_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); +error_out: + return bh; +} + +/* + * Set sb->s_dirt here because the superblock was "logically" altered. We + * need to recalculate its free blocks count and flush it out. + */ +static int reserve_blocks(struct super_block *sb, int count) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned free_blocks; + unsigned root_blocks; + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(es->s_r_blocks_count); + + if (free_blocks < count) + count = free_blocks; + + if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + /* + * We are too close to reserve and we are not privileged. + * Can we allocate anything at all? + */ + if (free_blocks > root_blocks) + count = free_blocks - root_blocks; + else + return 0; + } + + percpu_counter_mod(&sbi->s_freeblocks_counter, -count); + sb->s_dirt = 1; + return count; +} + +static void release_blocks(struct super_block *sb, int count) +{ + if (count) { + struct ext2_sb_info *sbi = EXT2_SB(sb); + + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + sb->s_dirt = 1; + } +} + +static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) +{ + unsigned free_blocks; + + if (!desc->bg_free_blocks_count) + return 0; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + if (free_blocks < count) + count = free_blocks; + desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + mark_buffer_dirty(bh); + return count; +} + +static void group_release_blocks(struct super_block *sb, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) +{ + if (count) { + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned free_blocks; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + sb->s_dirt = 1; + mark_buffer_dirty(bh); + } +} + +/* Free given blocks, update quota and i_blocks field */ +void ext2_free_blocks (struct inode * inode, unsigned long block, + unsigned long count) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + unsigned long i; + unsigned long overflow; + struct super_block * sb = inode->i_sb; + struct ext2_sb_info * sbi = EXT2_SB(sb); + struct ext2_group_desc * desc; + struct ext2_super_block * es = sbi->s_es; + unsigned freed = 0, group_freed; + + if (block < le32_to_cpu(es->s_first_data_block) || + block + count < block || + block + count > le32_to_cpu(es->s_blocks_count)) { + ext2_error (sb, "ext2_free_blocks", + "Freeing blocks not in datazone - " + "block = %lu, count = %lu", block, count); + goto error_return; + } + + ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); + +do_more: + overflow = 0; + block_group = (block - le32_to_cpu(es->s_first_data_block)) / + EXT2_BLOCKS_PER_GROUP(sb); + bit = (block - le32_to_cpu(es->s_first_data_block)) % + EXT2_BLOCKS_PER_GROUP(sb); + /* + * Check to see if we are freeing blocks across a group + * boundary. + */ + if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) { + overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb); + count -= overflow; + } + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + + desc = ext2_get_group_desc (sb, block_group, &bh2); + if (!desc) + goto error_return; + + if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || + in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || + in_range (block, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group) || + in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group)) + ext2_error (sb, "ext2_free_blocks", + "Freeing blocks in system zones - " + "Block = %lu, count = %lu", + block, count); + + for (i = 0, group_freed = 0; i < count; i++) { + if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit + i, bitmap_bh->b_data)) { + ext2_error(sb, __FUNCTION__, + "bit already cleared for block %lu", block + i); + } else { + group_freed++; + } + } + + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); + + group_release_blocks(sb, block_group, desc, bh2, group_freed); + freed += group_freed; + + if (overflow) { + block += count; + count = overflow; + goto do_more; + } +error_return: + brelse(bitmap_bh); + release_blocks(sb, freed); + DQUOT_FREE_BLOCK(inode, freed); +} + +static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) +{ + int k; + char *p, *r; + + if (!ext2_test_bit(goal, map)) + goto got_it; + +repeat: + if (goal) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + k = (goal + 63) & ~63; + goal = ext2_find_next_zero_bit(map, k, goal); + if (goal < k) + goto got_it; + /* + * Search in the remainder of the current group. + */ + } + + p = map + (goal >> 3); + r = memscan(p, 0, (size - goal + 7) >> 3); + k = (r - map) << 3; + if (k < size) { + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards to find the start of this + * group of free blocks - won't take more than 7 iterations. + */ + for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) + ; + goto got_it; + } + + k = ext2_find_next_zero_bit ((u32 *)map, size, goal); + if (k < size) { + goal = k; + goto got_it; + } + return -1; +got_it: + if (ext2_set_bit_atomic(lock, goal, (void *) map)) + goto repeat; + return goal; +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +int ext2_new_block(struct inode *inode, unsigned long goal, + u32 *prealloc_count, u32 *prealloc_block, int *err) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; /* bh2 */ + struct ext2_group_desc *desc; + int group_no; /* i */ + int ret_block; /* j */ + int group_idx; /* k */ + int target_block; /* tmp */ + int block = 0; + struct super_block *sb = inode->i_sb; + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); + unsigned prealloc_goal = es->s_prealloc_blocks; + unsigned group_alloc = 0, es_alloc, dq_alloc; + int nr_scanned_groups; + + if (!prealloc_goal--) + prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; + if (!prealloc_count || *prealloc_count) + prealloc_goal = 0; + + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + *err = -EDQUOT; + goto out; + } + + while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) + prealloc_goal--; + + dq_alloc = prealloc_goal + 1; + es_alloc = reserve_blocks(sb, dq_alloc); + if (!es_alloc) { + *err = -ENOSPC; + goto out_dquot; + } + + ext2_debug ("goal=%lu.\n", goal); + + if (goal < le32_to_cpu(es->s_first_data_block) || + goal >= le32_to_cpu(es->s_blocks_count)) + goal = le32_to_cpu(es->s_first_data_block); + group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; + desc = ext2_get_group_desc (sb, group_no, &gdp_bh); + if (!desc) { + /* + * gdp_bh may still be uninitialised. But group_release_blocks + * will not touch it because group_alloc is zero. + */ + goto io_error; + } + + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); + if (group_alloc) { + ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % + group_size); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + + ext2_debug("goal is at %d:%d.\n", group_no, ret_block); + + ret_block = grab_block(sb_bgl_lock(sbi, group_no), + bitmap_bh->b_data, group_size, ret_block); + if (ret_block >= 0) + goto got_block; + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + group_alloc = 0; + } + + ext2_debug ("Bit not found in block group %d.\n", group_no); + + /* + * Now search the rest of the groups. We assume that + * i and desc correctly point to the last group visited. + */ + nr_scanned_groups = 0; +retry: + for (group_idx = 0; !group_alloc && + group_idx < sbi->s_groups_count; group_idx++) { + group_no++; + if (group_no >= sbi->s_groups_count) + group_no = 0; + desc = ext2_get_group_desc(sb, group_no, &gdp_bh); + if (!desc) + goto io_error; + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); + } + if (!group_alloc) { + *err = -ENOSPC; + goto out_release; + } + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + + ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, + group_size, 0); + if (ret_block < 0) { + /* + * If a free block counter is corrupted we can loop inifintely. + * Detect that here. + */ + nr_scanned_groups++; + if (nr_scanned_groups > 2 * sbi->s_groups_count) { + ext2_error(sb, "ext2_new_block", + "corrupted free blocks counters"); + goto io_error; + } + /* + * Someone else grabbed the last free block in this blockgroup + * before us. Retry the scan. + */ + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + group_alloc = 0; + goto retry; + } + +got_block: + ext2_debug("using block group %d(%d)\n", + group_no, desc->bg_free_blocks_count); + + target_block = ret_block + group_no * group_size + + le32_to_cpu(es->s_first_data_block); + + if (target_block == le32_to_cpu(desc->bg_block_bitmap) || + target_block == le32_to_cpu(desc->bg_inode_bitmap) || + in_range(target_block, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group)) + ext2_error (sb, "ext2_new_block", + "Allocating block in system zone - " + "block = %u", target_block); + + if (target_block >= le32_to_cpu(es->s_blocks_count)) { + ext2_error (sb, "ext2_new_block", + "block(%d) >= blocks count(%d) - " + "block_group = %d, es == %p ", ret_block, + le32_to_cpu(es->s_blocks_count), group_no, es); + goto io_error; + } + block = target_block; + + /* OK, we _had_ allocated something */ + ext2_debug("found bit %d\n", ret_block); + + dq_alloc--; + es_alloc--; + group_alloc--; + + /* + * Do block preallocation now if required. + */ + write_lock(&EXT2_I(inode)->i_meta_lock); + if (group_alloc && !*prealloc_count) { + unsigned n; + + for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { + if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), + ret_block, + (void*) bitmap_bh->b_data)) + break; + } + *prealloc_block = block + 1; + *prealloc_count = n; + es_alloc -= n; + dq_alloc -= n; + group_alloc -= n; + } + write_unlock(&EXT2_I(inode)->i_meta_lock); + + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); + + ext2_debug ("allocating block %d. ", block); + + *err = 0; +out_release: + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + release_blocks(sb, es_alloc); +out_dquot: + DQUOT_FREE_BLOCK(inode, dq_alloc); +out: + brelse(bitmap_bh); + return block; + +io_error: + *err = -EIO; + goto out_release; +} + +unsigned long ext2_count_free_blocks (struct super_block * sb) +{ + struct ext2_group_desc * desc; + unsigned long desc_count = 0; + int i; +#ifdef EXT2FS_DEBUG + unsigned long bitmap_count, x; + struct ext2_super_block *es; + + lock_super (sb); + es = EXT2_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + desc = NULL; + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + struct buffer_head *bitmap_bh; + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + bitmap_bh = read_block_bitmap(sb, i); + if (!bitmap_bh) + continue; + + x = ext2_count_free(bitmap_bh, sb->s_blocksize); + printk ("group %d: stored = %d, counted = %lu\n", + i, le16_to_cpu(desc->bg_free_blocks_count), x); + bitmap_count += x; + brelse(bitmap_bh); + } + printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", + (long)le32_to_cpu(es->s_free_blocks_count), + desc_count, bitmap_count); + unlock_super (sb); + return bitmap_count; +#else + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + } + return desc_count; +#endif +} + +static inline int +block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) +{ + return ext2_test_bit ((block - + le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % + EXT2_BLOCKS_PER_GROUP(sb), map); +} + +static inline int test_root(int a, int b) +{ + int num = b; + + while (a > num) + num *= b; + return num == a; +} + +static int ext2_group_sparse(int group) +{ + if (group <= 1) + return 1; + return (test_root(group, 3) || test_root(group, 5) || + test_root(group, 7)); +} + +/** + * ext2_bg_has_super - number of blocks used by the superblock in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the superblock (primary or backup) + * in this group. Currently this will be only 0 or 1. + */ +int ext2_bg_has_super(struct super_block *sb, int group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& + !ext2_group_sparse(group)) + return 0; + return 1; +} + +/** + * ext2_bg_num_gdb - number of blocks used by the group table in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the group descriptor table + * (primary or backup) in this group. In the future there may be a + * different number of descriptor blocks in each group. + */ +unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& + !ext2_group_sparse(group)) + return 0; + return EXT2_SB(sb)->s_gdb_count; +} + +#ifdef CONFIG_EXT2_CHECK +/* Called at mount-time, super-block is locked */ +void ext2_check_blocks_bitmap (struct super_block * sb) +{ + struct buffer_head *bitmap_bh = NULL; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x, j; + unsigned long desc_blocks; + struct ext2_group_desc * desc; + int i; + + es = EXT2_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + desc = NULL; + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, i); + if (!bitmap_bh) + continue; + + if (ext2_bg_has_super(sb, i) && + !ext2_test_bit(0, bitmap_bh->b_data)) + ext2_error(sb, __FUNCTION__, + "Superblock in group %d is marked free", i); + + desc_blocks = ext2_bg_num_gdb(sb, i); + for (j = 0; j < desc_blocks; j++) + if (!ext2_test_bit(j + 1, bitmap_bh->b_data)) + ext2_error(sb, __FUNCTION__, + "Descriptor block #%ld in group " + "%d is marked free", j, i); + + if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap), + sb, bitmap_bh->b_data)) + ext2_error(sb, "ext2_check_blocks_bitmap", + "Block bitmap for group %d is marked free", + i); + + if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap), + sb, bitmap_bh->b_data)) + ext2_error(sb, "ext2_check_blocks_bitmap", + "Inode bitmap for group %d is marked free", + i); + + for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++) + if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j, + sb, bitmap_bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Block #%ld of the inode table in " + "group %d is marked free", j, i); + + x = ext2_count_free(bitmap_bh, sb->s_blocksize); + if (le16_to_cpu(desc->bg_free_blocks_count) != x) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, + le16_to_cpu(desc->bg_free_blocks_count), x); + bitmap_count += x; + } + if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count in super block, " + "stored = %lu, counted = %lu", + (unsigned long)le32_to_cpu(es->s_free_blocks_count), + bitmap_count); + brelse(bitmap_bh); +} +#endif diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c new file mode 100644 index 000000000000..20145b74623f --- /dev/null +++ b/fs/ext2/bitmap.c @@ -0,0 +1,25 @@ +/* + * linux/fs/ext2/bitmap.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include <linux/buffer_head.h> + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c new file mode 100644 index 000000000000..5b5f52876b42 --- /dev/null +++ b/fs/ext2/dir.c @@ -0,0 +1,673 @@ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * All code that works with directory layout had been switched to pagecache + * and moved here. AV + */ + +#include "ext2.h" +#include <linux/pagemap.h> +#include <linux/smp_lock.h> + +typedef struct ext2_dir_entry_2 ext2_dirent; + +/* + * ext2 uses block-sized chunks. Arguably, sector-sized ones would be + * more robust, but we have what we have + */ +static inline unsigned ext2_chunk_size(struct inode *inode) +{ + return inode->i_sb->s_blocksize; +} + +static inline void ext2_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} + +/* + * Return the offset into page `page_nr' of the last valid + * byte in that page, plus one. + */ +static unsigned +ext2_last_byte(struct inode *inode, unsigned long page_nr) +{ + unsigned last_byte = inode->i_size; + + last_byte -= page_nr << PAGE_CACHE_SHIFT; + if (last_byte > PAGE_CACHE_SIZE) + last_byte = PAGE_CACHE_SIZE; + return last_byte; +} + +static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +{ + struct inode *dir = page->mapping->host; + int err = 0; + dir->i_version++; + page->mapping->a_ops->commit_write(NULL, page, from, to); + if (IS_DIRSYNC(dir)) + err = write_one_page(page, 1); + else + unlock_page(page); + return err; +} + +static void ext2_check_page(struct page *page) +{ + struct inode *dir = page->mapping->host; + struct super_block *sb = dir->i_sb; + unsigned chunk_size = ext2_chunk_size(dir); + char *kaddr = page_address(page); + u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); + unsigned offs, rec_len; + unsigned limit = PAGE_CACHE_SIZE; + ext2_dirent *p; + char *error; + + if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { + limit = dir->i_size & ~PAGE_CACHE_MASK; + if (limit & (chunk_size - 1)) + goto Ebadsize; + if (!limit) + goto out; + } + for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { + p = (ext2_dirent *)(kaddr + offs); + rec_len = le16_to_cpu(p->rec_len); + + if (rec_len < EXT2_DIR_REC_LEN(1)) + goto Eshort; + if (rec_len & 3) + goto Ealign; + if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; + if (le32_to_cpu(p->inode) > max_inumber) + goto Einumber; + } + if (offs != limit) + goto Eend; +out: + SetPageChecked(page); + return; + + /* Too bad, we had an error */ + +Ebadsize: + ext2_error(sb, "ext2_check_page", + "size of directory #%lu is not a multiple of chunk size", + dir->i_ino + ); + goto fail; +Eshort: + error = "rec_len is smaller than minimal"; + goto bad_entry; +Ealign: + error = "unaligned directory entry"; + goto bad_entry; +Enamelen: + error = "rec_len is too small for name_len"; + goto bad_entry; +Espan: + error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "inode out of bounds"; +bad_entry: + ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode), + rec_len, p->name_len); + goto fail; +Eend: + p = (ext2_dirent *)(kaddr + offs); + ext2_error (sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode)); +fail: + SetPageChecked(page); + SetPageError(page); +} + +static struct page * ext2_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t*)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (!PageUptodate(page)) + goto fail; + if (!PageChecked(page)) + ext2_check_page(page); + if (PageError(page)) + goto fail; + } + return page; + +fail: + ext2_put_page(page); + return ERR_PTR(-EIO); +} + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + * + * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. + */ +static inline int ext2_match (int len, const char * const name, + struct ext2_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * p is at least 6 bytes before the end of page + */ +static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) +{ + return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); +} + +static inline unsigned +ext2_validate_entry(char *base, unsigned offset, unsigned mask) +{ + ext2_dirent *de = (ext2_dirent*)(base + offset); + ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); + while ((char*)p < (char*)de) { + if (p->rec_len == 0) + break; + p = ext2_next_entry(p); + } + return (char *)p - base; +} + +static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] = DT_UNKNOWN, + [EXT2_FT_REG_FILE] = DT_REG, + [EXT2_FT_DIR] = DT_DIR, + [EXT2_FT_CHRDEV] = DT_CHR, + [EXT2_FT_BLKDEV] = DT_BLK, + [EXT2_FT_FIFO] = DT_FIFO, + [EXT2_FT_SOCK] = DT_SOCK, + [EXT2_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR, + [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK, + [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK, +}; + +static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; + else + de->file_type = 0; +} + +static int +ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) +{ + loff_t pos = filp->f_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct super_block *sb = inode->i_sb; + unsigned int offset = pos & ~PAGE_CACHE_MASK; + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned long npages = dir_pages(inode); + unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); + unsigned char *types = NULL; + int need_revalidate = (filp->f_version != inode->i_version); + int ret; + + if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) + goto success; + + if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + types = ext2_filetype_table; + + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + ext2_dirent *de; + struct page *page = ext2_get_page(inode, n); + + if (IS_ERR(page)) { + ext2_error(sb, __FUNCTION__, + "bad page in #%lu", + inode->i_ino); + filp->f_pos += PAGE_CACHE_SIZE - offset; + ret = -EIO; + goto done; + } + kaddr = page_address(page); + if (need_revalidate) { + offset = ext2_validate_entry(kaddr, offset, chunk_mask); + need_revalidate = 0; + } + de = (ext2_dirent *)(kaddr+offset); + limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); + for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { + if (de->rec_len == 0) { + ext2_error(sb, __FUNCTION__, + "zero-length directory entry"); + ret = -EIO; + ext2_put_page(page); + goto done; + } + if (de->inode) { + int over; + unsigned char d_type = DT_UNKNOWN; + + if (types && de->file_type < EXT2_FT_MAX) + d_type = types[de->file_type]; + + offset = (char *)de - kaddr; + over = filldir(dirent, de->name, de->name_len, + (n<<PAGE_CACHE_SHIFT) | offset, + le32_to_cpu(de->inode), d_type); + if (over) { + ext2_put_page(page); + goto success; + } + } + filp->f_pos += le16_to_cpu(de->rec_len); + } + ext2_put_page(page); + } + +success: + ret = 0; +done: + filp->f_version = inode->i_version; + return ret; +} + +/* + * ext2_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the page in which the entry was found, and the entry itself + * (as a parameter - res_dir). Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, + struct dentry *dentry, struct page ** res_page) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned long start, n; + unsigned long npages = dir_pages(dir); + struct page *page = NULL; + struct ext2_inode_info *ei = EXT2_I(dir); + ext2_dirent * de; + + if (npages == 0) + goto out; + + /* OFFSET_CACHE */ + *res_page = NULL; + + start = ei->i_dir_start_lookup; + if (start >= npages) + start = 0; + n = start; + do { + char *kaddr; + page = ext2_get_page(dir, n); + if (!IS_ERR(page)) { + kaddr = page_address(page); + de = (ext2_dirent *) kaddr; + kaddr += ext2_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { + if (de->rec_len == 0) { + ext2_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + ext2_put_page(page); + goto out; + } + if (ext2_match (namelen, name, de)) + goto found; + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + if (++n >= npages) + n = 0; + } while (n != start); +out: + return NULL; + +found: + *res_page = page; + ei->i_dir_start_lookup = n; + return de; +} + +struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) +{ + struct page *page = ext2_get_page(dir, 0); + ext2_dirent *de = NULL; + + if (!IS_ERR(page)) { + de = ext2_next_entry((ext2_dirent *) page_address(page)); + *p = page; + } + return de; +} + +ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) +{ + ino_t res = 0; + struct ext2_dir_entry_2 * de; + struct page *page; + + de = ext2_find_entry (dir, dentry, &page); + if (de) { + res = le32_to_cpu(de->inode); + kunmap(page); + page_cache_release(page); + } + return res; +} + +/* Releases the page */ +void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct page *page, struct inode *inode) +{ + unsigned from = (char *) de - (char *) page_address(page); + unsigned to = from + le16_to_cpu(de->rec_len); + int err; + + lock_page(page); + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + err = ext2_commit_chunk(page, from, to); + ext2_put_page(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); +} + +/* + * Parent is locked. + */ +int ext2_add_link (struct dentry *dentry, struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned chunk_size = ext2_chunk_size(dir); + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned short rec_len, name_len; + struct page *page = NULL; + ext2_dirent * de; + unsigned long npages = dir_pages(dir); + unsigned long n; + char *kaddr; + unsigned from, to; + int err; + + /* + * We take care of directory expansion in the same loop. + * This code plays outside i_size, so it locks the page + * to protect that region. + */ + for (n = 0; n <= npages; n++) { + char *dir_end; + + page = ext2_get_page(dir, n); + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; + lock_page(page); + kaddr = page_address(page); + dir_end = kaddr + ext2_last_byte(dir, n); + de = (ext2_dirent *)kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *)de <= kaddr) { + if ((char *)de == dir_end) { + /* We hit i_size */ + name_len = 0; + rec_len = chunk_size; + de->rec_len = cpu_to_le16(chunk_size); + de->inode = 0; + goto got_it; + } + if (de->rec_len == 0) { + ext2_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out_unlock; + } + err = -EEXIST; + if (ext2_match (namelen, name, de)) + goto out_unlock; + name_len = EXT2_DIR_REC_LEN(de->name_len); + rec_len = le16_to_cpu(de->rec_len); + if (!de->inode && rec_len >= reclen) + goto got_it; + if (rec_len >= name_len + reclen) + goto got_it; + de = (ext2_dirent *) ((char *) de + rec_len); + } + unlock_page(page); + ext2_put_page(page); + } + BUG(); + return -EINVAL; + +got_it: + from = (char*)de - (char*)page_address(page); + to = from + rec_len; + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + goto out_unlock; + if (de->inode) { + ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); + de1->rec_len = cpu_to_le16(rec_len - name_len); + de->rec_len = cpu_to_le16(name_len); + de = de1; + } + de->name_len = namelen; + memcpy (de->name, name, namelen); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + err = ext2_commit_chunk(page, from, to); + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + /* OFFSET_CACHE */ +out_put: + ext2_put_page(page); +out: + return err; +out_unlock: + unlock_page(page); + goto out_put; +} + +/* + * ext2_delete_entry deletes a directory entry by merging it with the + * previous entry. Page is up-to-date. Releases the page. + */ +int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *kaddr = page_address(page); + unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); + unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + ext2_dirent * pde = NULL; + ext2_dirent * de = (ext2_dirent *) (kaddr + from); + int err; + + while ((char*)de < (char*)dir) { + if (de->rec_len == 0) { + ext2_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out; + } + pde = de; + de = ext2_next_entry(de); + } + if (pde) + from = (char*)pde - (char*)page_address(page); + lock_page(page); + err = mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + if (pde) + pde->rec_len = cpu_to_le16(to-from); + dir->inode = 0; + err = ext2_commit_chunk(page, from, to); + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(inode); +out: + ext2_put_page(page); + return err; +} + +/* + * Set the first fragment of directory. + */ +int ext2_make_empty(struct inode *inode, struct inode *parent) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page(mapping, 0); + unsigned chunk_size = ext2_chunk_size(inode); + struct ext2_dir_entry_2 * de; + int err; + void *kaddr; + + if (!page) + return -ENOMEM; + err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + if (err) { + unlock_page(page); + goto fail; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, chunk_size); + de = (struct ext2_dir_entry_2 *)kaddr; + de->name_len = 1; + de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); + memcpy (de->name, ".\0\0", 4); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + + de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); + de->name_len = 2; + de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); + de->inode = cpu_to_le32(parent->i_ino); + memcpy (de->name, "..\0", 4); + ext2_set_de_type (de, inode); + kunmap_atomic(kaddr, KM_USER0); + err = ext2_commit_chunk(page, 0, chunk_size); +fail: + page_cache_release(page); + return err; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +int ext2_empty_dir (struct inode * inode) +{ + struct page *page = NULL; + unsigned long i, npages = dir_pages(inode); + + for (i = 0; i < npages; i++) { + char *kaddr; + ext2_dirent * de; + page = ext2_get_page(inode, i); + + if (IS_ERR(page)) + continue; + + kaddr = page_address(page); + de = (ext2_dirent *)kaddr; + kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); + + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + ext2_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry"); + printk("kaddr=%p, de=%p\n", kaddr, de); + goto not_empty; + } + if (de->inode != 0) { + /* check for . and .. */ + if (de->name[0] != '.') + goto not_empty; + if (de->name_len > 2) + goto not_empty; + if (de->name_len < 2) { + if (de->inode != + cpu_to_le32(inode->i_ino)) + goto not_empty; + } else if (de->name[1] != '.') + goto not_empty; + } + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + return 1; + +not_empty: + ext2_put_page(page); + return 0; +} + +struct file_operations ext2_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = ext2_readdir, + .ioctl = ext2_ioctl, + .fsync = ext2_sync_file, +}; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h new file mode 100644 index 000000000000..9f1a40e7945c --- /dev/null +++ b/fs/ext2/ext2.h @@ -0,0 +1,160 @@ +#include <linux/fs.h> +#include <linux/ext2_fs.h> + +/* + * second extended file system inode data in memory + */ +struct ext2_inode_info { + __le32 i_data[15]; + __u32 i_flags; + __u32 i_faddr; + __u8 i_frag_no; + __u8 i_frag_size; + __u16 i_state; + __u32 i_file_acl; + __u32 i_dir_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + __u32 i_block_group; + + /* + * i_next_alloc_block is the logical (file-relative) number of the + * most-recently-allocated block in this file. Yes, it is misnamed. + * We use this for detecting linearly ascending allocation requests. + */ + __u32 i_next_alloc_block; + + /* + * i_next_alloc_goal is the *physical* companion to i_next_alloc_block. + * it the the physical block number of the block which was most-recently + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + __u32 i_next_alloc_goal; + __u32 i_prealloc_block; + __u32 i_prealloc_count; + __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT2_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT2_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + rwlock_t i_meta_lock; + struct inode vfs_inode; +}; + +/* + * Inode dynamic state flags + */ +#define EXT2_STATE_NEW 0x00000001 /* inode is newly created */ + + +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in <linux/kernel.h> but none of the + * ext2 source programs needs to include it so they are duplicated here. + */ + +static inline struct ext2_inode_info *EXT2_I(struct inode *inode) +{ + return container_of(inode, struct ext2_inode_info, vfs_inode); +} + +/* balloc.c */ +extern int ext2_bg_has_super(struct super_block *sb, int group); +extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); +extern int ext2_new_block (struct inode *, unsigned long, + __u32 *, __u32 *, int *); +extern void ext2_free_blocks (struct inode *, unsigned long, + unsigned long); +extern unsigned long ext2_count_free_blocks (struct super_block *); +extern unsigned long ext2_count_dirs (struct super_block *); +extern void ext2_check_blocks_bitmap (struct super_block *); +extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh); + +/* dir.c */ +extern int ext2_add_link (struct dentry *, struct inode *); +extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); +extern int ext2_make_empty(struct inode *, struct inode *); +extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); +extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); +extern int ext2_empty_dir (struct inode *); +extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); +extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); + +/* fsync.c */ +extern int ext2_sync_file (struct file *, struct dentry *, int); + +/* ialloc.c */ +extern struct inode * ext2_new_inode (struct inode *, int); +extern void ext2_free_inode (struct inode *); +extern unsigned long ext2_count_free_inodes (struct super_block *); +extern void ext2_check_inodes_bitmap (struct super_block *); +extern unsigned long ext2_count_free (struct buffer_head *, unsigned); + +/* inode.c */ +extern void ext2_read_inode (struct inode *); +extern int ext2_write_inode (struct inode *, int); +extern void ext2_delete_inode (struct inode *); +extern int ext2_sync_inode (struct inode *); +extern void ext2_discard_prealloc (struct inode *); +extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern void ext2_truncate (struct inode *); +extern int ext2_setattr (struct dentry *, struct iattr *); +extern void ext2_set_inode_flags(struct inode *inode); + +/* ioctl.c */ +extern int ext2_ioctl (struct inode *, struct file *, unsigned int, + unsigned long); + +/* super.c */ +extern void ext2_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext2_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext2_update_dynamic_rev (struct super_block *sb); +extern void ext2_write_super (struct super_block *); + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern struct file_operations ext2_dir_operations; + +/* file.c */ +extern struct inode_operations ext2_file_inode_operations; +extern struct file_operations ext2_file_operations; + +/* inode.c */ +extern struct address_space_operations ext2_aops; +extern struct address_space_operations ext2_nobh_aops; + +/* namei.c */ +extern struct inode_operations ext2_dir_inode_operations; +extern struct inode_operations ext2_special_inode_operations; + +/* symlink.c */ +extern struct inode_operations ext2_fast_symlink_inode_operations; +extern struct inode_operations ext2_symlink_inode_operations; diff --git a/fs/ext2/file.c b/fs/ext2/file.c new file mode 100644 index 000000000000..f5e86141ec54 --- /dev/null +++ b/fs/ext2/file.c @@ -0,0 +1,68 @@ +/* + * linux/fs/ext2/file.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 fs regular file handling primitives + * + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + */ + +#include <linux/time.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +/* + * Called when an inode is released. Note that this is different + * from ext2_open_file: open gets called at every open, but release + * gets called only when /all/ the files are closed. + */ +static int ext2_release_file (struct inode * inode, struct file * filp) +{ + if (filp->f_mode & FMODE_WRITE) + ext2_discard_prealloc (inode); + return 0; +} + +/* + * We have mostly NULL's here: the current defaults are ok for + * the ext2 filesystem. + */ +struct file_operations ext2_file_operations = { + .llseek = generic_file_llseek, + .read = generic_file_read, + .write = generic_file_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .ioctl = ext2_ioctl, + .mmap = generic_file_mmap, + .open = generic_file_open, + .release = ext2_release_file, + .fsync = ext2_sync_file, + .readv = generic_file_readv, + .writev = generic_file_writev, + .sendfile = generic_file_sendfile, +}; + +struct inode_operations ext2_file_inode_operations = { + .truncate = ext2_truncate, +#ifdef CONFIG_EXT2_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext2_listxattr, + .removexattr = generic_removexattr, +#endif + .setattr = ext2_setattr, + .permission = ext2_permission, +}; diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c new file mode 100644 index 000000000000..c9c2e5ffa48e --- /dev/null +++ b/fs/ext2/fsync.c @@ -0,0 +1,51 @@ +/* + * linux/fs/ext2/fsync.c + * + * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * from + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * from + * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2fs fsync primitive + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * Removed unnecessary code duplication for little endian machines + * and excessive __inline__s. + * Andi Kleen, 1997 + * + * Major simplications and cleanup - we only need to do the metadata, because + * we can depend on generic_block_fdatasync() to sync the data blocks. + */ + +#include "ext2.h" +#include <linux/smp_lock.h> +#include <linux/buffer_head.h> /* for fsync_inode_buffers() */ + + +/* + * File may be NULL when we are called. Perhaps we shouldn't + * even pass file to fsync ? + */ + +int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = ext2_sync_inode(inode); + if (ret == 0) + ret = err; + return ret; +} diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c new file mode 100644 index 000000000000..77e059149212 --- /dev/null +++ b/fs/ext2/ialloc.c @@ -0,0 +1,735 @@ +/* + * linux/fs/ext2/ialloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/config.h> +#include <linux/quotaops.h> +#include <linux/sched.h> +#include <linux/backing-dev.h> +#include <linux/buffer_head.h> +#include <linux/random.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +/* + * ialloc.c contains the inodes allocation and deallocation routines + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. + */ + + +/* + * Read the inode allocation bitmap for a given block_group, reading + * into the specified slot in the superblock's bitmap cache. + * + * Return buffer_head of bitmap on success or NULL. + */ +static struct buffer_head * +read_inode_bitmap(struct super_block * sb, unsigned long block_group) +{ + struct ext2_group_desc *desc; + struct buffer_head *bh = NULL; + + desc = ext2_get_group_desc(sb, block_group, NULL); + if (!desc) + goto error_out; + + bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + if (!bh) + ext2_error(sb, "read_inode_bitmap", + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %u", + block_group, le32_to_cpu(desc->bg_inode_bitmap)); +error_out: + return bh; +} + +static void ext2_release_inode(struct super_block *sb, int group, int dir) +{ + struct ext2_group_desc * desc; + struct buffer_head *bh; + + desc = ext2_get_group_desc(sb, group, &bh); + if (!desc) { + ext2_error(sb, "ext2_release_inode", + "can't get descriptor for group %d", group); + return; + } + + spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); + if (dir) + desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); + spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); + if (dir) + percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); + sb->s_dirt = 1; + mark_buffer_dirty(bh); +} + +/* + * NOTE! When we get the inode, we're the only people + * that have access to it, and as such there are no + * race conditions we have to worry about. The inode + * is not on the hash-lists, and it cannot be reached + * through the filesystem because the directory entry + * has been deleted earlier. + * + * HOWEVER: we must make sure that we get no aliases, + * which means that we have to call "clear_inode()" + * _before_ we mark the inode not in use in the inode + * bitmaps. Otherwise a newly created file might use + * the same inode number (not actually the same pointer + * though), and then we'd have two inodes sharing the + * same inode number and space on the harddisk. + */ +void ext2_free_inode (struct inode * inode) +{ + struct super_block * sb = inode->i_sb; + int is_directory; + unsigned long ino; + struct buffer_head *bitmap_bh = NULL; + unsigned long block_group; + unsigned long bit; + struct ext2_super_block * es; + + ino = inode->i_ino; + ext2_debug ("freeing inode %lu\n", ino); + + /* + * Note: we must free any quota before locking the superblock, + * as writing the quota to disk may need the lock as well. + */ + if (!is_bad_inode(inode)) { + /* Quota is already initialized in iput() */ + ext2_xattr_delete_inode(inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + } + + es = EXT2_SB(sb)->s_es; + is_directory = S_ISDIR(inode->i_mode); + + /* Do this BEFORE marking the inode not in use or returning an error */ + clear_inode (inode); + + if (ino < EXT2_FIRST_INO(sb) || + ino > le32_to_cpu(es->s_inodes_count)) { + ext2_error (sb, "ext2_free_inode", + "reserved or nonexistent inode %lu", ino); + goto error_return; + } + block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); + bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + + /* Ok, now we can actually update the inode bitmaps.. */ + if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), + bit, (void *) bitmap_bh->b_data)) + ext2_error (sb, "ext2_free_inode", + "bit already cleared for inode %lu", ino); + else + ext2_release_inode(sb, block_group, is_directory); + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); +error_return: + brelse(bitmap_bh); +} + +/* + * We perform asynchronous prereading of the new inode's inode block when + * we create the inode, in the expectation that the inode will be written + * back soon. There are two reasons: + * + * - When creating a large number of files, the async prereads will be + * nicely merged into large reads + * - When writing out a large number of inodes, we don't need to keep on + * stalling the writes while we read the inode block. + * + * FIXME: ext2_get_group_desc() needs to be simplified. + */ +static void ext2_preread_inode(struct inode *inode) +{ + unsigned long block_group; + unsigned long offset; + unsigned long block; + struct buffer_head *bh; + struct ext2_group_desc * gdp; + struct backing_dev_info *bdi; + + bdi = inode->i_mapping->backing_dev_info; + if (bdi_read_congested(bdi)) + return; + if (bdi_write_congested(bdi)) + return; + + block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); + gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh); + if (gdp == NULL) + return; + + /* + * Figure out the offset within the block group inode table + */ + offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) * + EXT2_INODE_SIZE(inode->i_sb); + block = le32_to_cpu(gdp->bg_inode_table) + + (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb)); + sb_breadahead(inode->i_sb, block); +} + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +static int find_group_dir(struct super_block *sb, struct inode *parent) +{ + int ngroups = EXT2_SB(sb)->s_groups_count; + int avefreei = ext2_count_free_inodes(sb) / ngroups; + struct ext2_group_desc *desc, *best_desc = NULL; + struct buffer_head *bh, *best_bh = NULL; + int group, best_group = -1; + + for (group = 0; group < ngroups; group++) { + desc = ext2_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + continue; + if (!best_desc || + (le16_to_cpu(desc->bg_free_blocks_count) > + le16_to_cpu(best_desc->bg_free_blocks_count))) { + best_group = group; + best_desc = desc; + best_bh = bh; + } + } + if (!best_desc) + return -1; + + return best_group; +} + +/* + * Orlov's allocator for directories. + * + * We always try to spread first-level directories. + * + * If there are blockgroups with both free inodes and free blocks counts + * not worse than average we return one with smallest directory count. + * Otherwise we simply return a random group. + * + * For the rest rules look so: + * + * It's OK to put directory into a group unless + * it has too many directories already (max_dirs) or + * it has too few free inodes left (min_inodes) or + * it has too few free blocks left (min_blocks) or + * it's already running too large debt (max_debt). + * Parent's group is prefered, if it doesn't satisfy these + * conditions we search cyclically through the rest. If none + * of the groups look good we just look for a group with more + * free inodes than average (starting at parent's group). + * + * Debt is incremented each time we allocate a directory and decremented + * when we allocate an inode, within 0--255. + */ + +#define INODE_COST 64 +#define BLOCK_COST 256 + +static int find_group_orlov(struct super_block *sb, struct inode *parent) +{ + int parent_group = EXT2_I(parent)->i_block_group; + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + int ngroups = sbi->s_groups_count; + int inodes_per_group = EXT2_INODES_PER_GROUP(sb); + int freei; + int avefreei; + int free_blocks; + int avefreeb; + int blocks_per_dir; + int ndirs; + int max_debt, max_dirs, min_blocks, min_inodes; + int group = -1, i; + struct ext2_group_desc *desc; + struct buffer_head *bh; + + freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); + avefreei = freei / ngroups; + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + avefreeb = free_blocks / ngroups; + ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); + + if ((parent == sb->s_root->d_inode) || + (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) { + struct ext2_group_desc *best_desc = NULL; + struct buffer_head *best_bh = NULL; + int best_ndir = inodes_per_group; + int best_group = -1; + + get_random_bytes(&group, sizeof(group)); + parent_group = (unsigned)group % ngroups; + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext2_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + continue; + if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) + continue; + best_group = group; + best_ndir = le16_to_cpu(desc->bg_used_dirs_count); + best_desc = desc; + best_bh = bh; + } + if (best_group >= 0) { + desc = best_desc; + bh = best_bh; + group = best_group; + goto found; + } + goto fallback; + } + + if (ndirs == 0) + ndirs = 1; /* percpu_counters are approximate... */ + + blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs; + + max_dirs = ndirs / ngroups + inodes_per_group / 16; + min_inodes = avefreei - inodes_per_group / 4; + min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4; + + max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST); + if (max_debt * INODE_COST > inodes_per_group) + max_debt = inodes_per_group / INODE_COST; + if (max_debt > 255) + max_debt = 255; + if (max_debt == 0) + max_debt = 1; + + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext2_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (sbi->s_debts[group] >= max_debt) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) + continue; + if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) + continue; + goto found; + } + +fallback: + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext2_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + goto found; + } + + if (avefreei) { + /* + * The free-inodes counter is approximate, and for really small + * filesystems the above test can fail to find any blockgroups + */ + avefreei = 0; + goto fallback; + } + + return -1; + +found: + return group; +} + +static int find_group_other(struct super_block *sb, struct inode *parent) +{ + int parent_group = EXT2_I(parent)->i_block_group; + int ngroups = EXT2_SB(sb)->s_groups_count; + struct ext2_group_desc *desc; + struct buffer_head *bh; + int group, i; + + /* + * Try to place the inode in its parent directory + */ + group = parent_group; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count) && + le16_to_cpu(desc->bg_free_blocks_count)) + goto found; + + /* + * We're going to place this inode in a different blockgroup from its + * parent. We want to cause files in a common directory to all land in + * the same blockgroup. But we want files which are in a different + * directory which shares a blockgroup with our parent to land in a + * different blockgroup. + * + * So add our directory's i_ino into the starting point for the hash. + */ + group = (group + parent->i_ino) % ngroups; + + /* + * Use a quadratic hash to find a group with a free inode and some + * free blocks. + */ + for (i = 1; i < ngroups; i <<= 1) { + group += i; + if (group >= ngroups) + group -= ngroups; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count) && + le16_to_cpu(desc->bg_free_blocks_count)) + goto found; + } + + /* + * That failed: try linear search for a free inode, even if that group + * has no free blocks. + */ + group = parent_group; + for (i = 0; i < ngroups; i++) { + if (++group >= ngroups) + group = 0; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + goto found; + } + + return -1; + +found: + return group; +} + +struct inode *ext2_new_inode(struct inode *dir, int mode) +{ + struct super_block *sb; + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bh2; + int group, i; + ino_t ino = 0; + struct inode * inode; + struct ext2_group_desc *gdp; + struct ext2_super_block *es; + struct ext2_inode_info *ei; + struct ext2_sb_info *sbi; + int err; + + sb = dir->i_sb; + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ei = EXT2_I(inode); + sbi = EXT2_SB(sb); + es = sbi->s_es; + if (S_ISDIR(mode)) { + if (test_opt(sb, OLDALLOC)) + group = find_group_dir(sb, dir); + else + group = find_group_orlov(sb, dir); + } else + group = find_group_other(sb, dir); + + if (group == -1) { + err = -ENOSPC; + goto fail; + } + + for (i = 0; i < sbi->s_groups_count; i++) { + gdp = ext2_get_group_desc(sb, group, &bh2); + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, group); + if (!bitmap_bh) { + err = -EIO; + goto fail; + } + ino = 0; + +repeat_in_this_group: + ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data, + EXT2_INODES_PER_GROUP(sb), ino); + if (ino >= EXT2_INODES_PER_GROUP(sb)) { + /* + * Rare race: find_group_xx() decided that there were + * free inodes in this group, but by the time we tried + * to allocate one, they're all gone. This can also + * occur because the counters which find_group_orlov() + * uses are approximate. So just go and search the + * next block group. + */ + if (++group == sbi->s_groups_count) + group = 0; + continue; + } + if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group), + ino, bitmap_bh->b_data)) { + /* we lost this inode */ + if (++ino >= EXT2_INODES_PER_GROUP(sb)) { + /* this group is exhausted, try next group */ + if (++group == sbi->s_groups_count) + group = 0; + continue; + } + /* try to find free inode in the same group */ + goto repeat_in_this_group; + } + goto got; + } + + /* + * Scanned all blockgroups. + */ + err = -ENOSPC; + goto fail; +got: + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); + brelse(bitmap_bh); + + ino += group * EXT2_INODES_PER_GROUP(sb) + 1; + if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { + ext2_error (sb, "ext2_new_inode", + "reserved inode or inode > inodes count - " + "block_group = %d,inode=%lu", group, + (unsigned long) ino); + err = -EIO; + goto fail; + } + + percpu_counter_mod(&sbi->s_freeinodes_counter, -1); + if (S_ISDIR(mode)) + percpu_counter_inc(&sbi->s_dirs_counter); + + spin_lock(sb_bgl_lock(sbi, group)); + gdp->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + if (S_ISDIR(mode)) { + if (sbi->s_debts[group] < 255) + sbi->s_debts[group]++; + gdp->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + } else { + if (sbi->s_debts[group]) + sbi->s_debts[group]--; + } + spin_unlock(sb_bgl_lock(sbi, group)); + + sb->s_dirt = 1; + mark_buffer_dirty(bh2); + inode->i_uid = current->fsuid; + if (test_opt (sb, GRPID)) + inode->i_gid = dir->i_gid; + else if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + inode->i_mode = mode; + + inode->i_ino = ino; + inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + memset(ei->i_data, 0, sizeof(ei->i_data)); + ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + if (S_ISLNK(mode)) + ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); + /* dirsync is only applied to directories */ + if (!S_ISDIR(mode)) + ei->i_flags &= ~EXT2_DIRSYNC_FL; + ei->i_faddr = 0; + ei->i_frag_no = 0; + ei->i_frag_size = 0; + ei->i_file_acl = 0; + ei->i_dir_acl = 0; + ei->i_dtime = 0; + ei->i_block_group = group; + ei->i_next_alloc_block = 0; + ei->i_next_alloc_goal = 0; + ei->i_prealloc_block = 0; + ei->i_prealloc_count = 0; + ei->i_dir_start_lookup = 0; + ei->i_state = EXT2_STATE_NEW; + ext2_set_inode_flags(inode); + spin_lock(&sbi->s_next_gen_lock); + inode->i_generation = sbi->s_next_generation++; + spin_unlock(&sbi->s_next_gen_lock); + insert_inode_hash(inode); + + if (DQUOT_ALLOC_INODE(inode)) { + DQUOT_DROP(inode); + err = -ENOSPC; + goto fail2; + } + err = ext2_init_acl(inode, dir); + if (err) { + DQUOT_FREE_INODE(inode); + goto fail2; + } + mark_inode_dirty(inode); + ext2_debug("allocating inode %lu\n", inode->i_ino); + ext2_preread_inode(inode); + return inode; + +fail2: + inode->i_flags |= S_NOQUOTA; + inode->i_nlink = 0; + iput(inode); + return ERR_PTR(err); + +fail: + make_bad_inode(inode); + iput(inode); + return ERR_PTR(err); +} + +unsigned long ext2_count_free_inodes (struct super_block * sb) +{ + struct ext2_group_desc *desc; + unsigned long desc_count = 0; + int i; + +#ifdef EXT2FS_DEBUG + struct ext2_super_block *es; + unsigned long bitmap_count = 0; + struct buffer_head *bitmap_bh = NULL; + + lock_super (sb); + es = EXT2_SB(sb)->s_es; + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + unsigned x; + + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, i); + if (!bitmap_bh) + continue; + + x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8); + printk("group %d: stored = %d, counted = %u\n", + i, le16_to_cpu(desc->bg_free_inodes_count), x); + bitmap_count += x; + } + brelse(bitmap_bh); + printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", + percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), + desc_count, bitmap_count); + unlock_super(sb); + return desc_count; +#else + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + } + return desc_count; +#endif +} + +/* Called at mount-time, super-block is locked */ +unsigned long ext2_count_dirs (struct super_block * sb) +{ + unsigned long count = 0; + int i; + + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL); + if (!gdp) + continue; + count += le16_to_cpu(gdp->bg_used_dirs_count); + } + return count; +} + +#ifdef CONFIG_EXT2_CHECK +/* Called at mount-time, super-block is locked */ +void ext2_check_inodes_bitmap (struct super_block * sb) +{ + struct ext2_super_block * es = EXT2_SB(sb)->s_es; + unsigned long desc_count = 0, bitmap_count = 0; + struct buffer_head *bitmap_bh = NULL; + int i; + + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + struct ext2_group_desc *desc; + unsigned x; + + desc = ext2_get_group_desc(sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, i); + if (!bitmap_bh) + continue; + + x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8); + if (le16_to_cpu(desc->bg_free_inodes_count) != x) + ext2_error (sb, "ext2_check_inodes_bitmap", + "Wrong free inodes count in group %d, " + "stored = %d, counted = %lu", i, + le16_to_cpu(desc->bg_free_inodes_count), x); + bitmap_count += x; + } + brelse(bitmap_bh); + if (percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter) != + bitmap_count) + ext2_error(sb, "ext2_check_inodes_bitmap", + "Wrong free inodes count in super block, " + "stored = %lu, counted = %lu", + (unsigned long)le32_to_cpu(es->s_free_inodes_count), + bitmap_count); +} +#endif diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c new file mode 100644 index 000000000000..b890be022496 --- /dev/null +++ b/fs/ext2/inode.c @@ -0,0 +1,1276 @@ +/* + * linux/fs/ext2/inode.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie + * (sct@dcs.ed.ac.uk), 1993, 1998 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + * + * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 + */ + +#include <linux/smp_lock.h> +#include <linux/time.h> +#include <linux/highuid.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> +#include <linux/module.h> +#include <linux/writeback.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include "ext2.h" +#include "acl.h" + +MODULE_AUTHOR("Remy Card and others"); +MODULE_DESCRIPTION("Second Extended Filesystem"); +MODULE_LICENSE("GPL"); + +static int ext2_update_inode(struct inode * inode, int do_sync); + +/* + * Test whether an inode is a fast symlink. + */ +static inline int ext2_inode_is_fast_symlink(struct inode *inode) +{ + int ea_blocks = EXT2_I(inode)->i_file_acl ? + (inode->i_sb->s_blocksize >> 9) : 0; + + return (S_ISLNK(inode->i_mode) && + inode->i_blocks - ea_blocks == 0); +} + +/* + * Called at the last iput() if i_nlink is zero. + */ +void ext2_delete_inode (struct inode * inode) +{ + if (is_bad_inode(inode)) + goto no_delete; + EXT2_I(inode)->i_dtime = get_seconds(); + mark_inode_dirty(inode); + ext2_update_inode(inode, inode_needs_sync(inode)); + + inode->i_size = 0; + if (inode->i_blocks) + ext2_truncate (inode); + ext2_free_inode (inode); + + return; +no_delete: + clear_inode(inode); /* We must guarantee clearing of inode... */ +} + +void ext2_discard_prealloc (struct inode * inode) +{ +#ifdef EXT2_PREALLOCATE + struct ext2_inode_info *ei = EXT2_I(inode); + write_lock(&ei->i_meta_lock); + if (ei->i_prealloc_count) { + unsigned short total = ei->i_prealloc_count; + unsigned long block = ei->i_prealloc_block; + ei->i_prealloc_count = 0; + ei->i_prealloc_block = 0; + write_unlock(&ei->i_meta_lock); + ext2_free_blocks (inode, block, total); + return; + } else + write_unlock(&ei->i_meta_lock); +#endif +} + +static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) +{ +#ifdef EXT2FS_DEBUG + static unsigned long alloc_hits, alloc_attempts; +#endif + unsigned long result; + + +#ifdef EXT2_PREALLOCATE + struct ext2_inode_info *ei = EXT2_I(inode); + write_lock(&ei->i_meta_lock); + if (ei->i_prealloc_count && + (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block)) + { + result = ei->i_prealloc_block++; + ei->i_prealloc_count--; + write_unlock(&ei->i_meta_lock); + ext2_debug ("preallocation hit (%lu/%lu).\n", + ++alloc_hits, ++alloc_attempts); + } else { + write_unlock(&ei->i_meta_lock); + ext2_discard_prealloc (inode); + ext2_debug ("preallocation miss (%lu/%lu).\n", + alloc_hits, ++alloc_attempts); + if (S_ISREG(inode->i_mode)) + result = ext2_new_block (inode, goal, + &ei->i_prealloc_count, + &ei->i_prealloc_block, err); + else + result = ext2_new_block(inode, goal, NULL, NULL, err); + } +#else + result = ext2_new_block (inode, goal, 0, 0, err); +#endif + return result; +} + +typedef struct { + __le32 *p; + __le32 key; + struct buffer_head *bh; +} Indirect; + +static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) +{ + p->key = *(p->p = v); + p->bh = bh; +} + +static inline int verify_chain(Indirect *from, Indirect *to) +{ + while (from <= to && from->key == *from->p) + from++; + return (from > to); +} + +/** + * ext2_block_to_path - parse the block number into array of offsets + * @inode: inode in question (we are only interested in its superblock) + * @i_block: block number to be parsed + * @offsets: array to store the offsets in + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. + * To store the locations of file's data ext2 uses a data structure common + * for UNIX filesystems - tree of pointers anchored in the inode, with + * data blocks at leaves and indirect blocks in intermediate nodes. + * This function translates the block number into path in that tree - + * return value is the path length and @offsets[n] is the offset of + * pointer to (n+1)th node in the nth one. If @block is out of range + * (negative or too large) warning is printed and zero returned. + * + * Note: function doesn't find node addresses, so no IO is needed. All + * we need to know is the capacity of indirect blocks (taken from the + * inode->i_sb). + */ + +/* + * Portability note: the last comparison (check that we fit into triple + * indirect block) is spelled differently, because otherwise on an + * architecture with 32-bit longs and 8Kb pages we might get into trouble + * if our filesystem had 8Kb blocks. We might use long long, but that would + * kill us on x86. Oh, well, at least the sign propagation does not matter - + * i_block would have to be negative in the very beginning, so we would not + * get there at all. + */ + +static int ext2_block_to_path(struct inode *inode, + long i_block, int offsets[4], int *boundary) +{ + int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT2_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + int n = 0; + int final = 0; + + if (i_block < 0) { + ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0"); + } else if (i_block < direct_blocks) { + offsets[n++] = i_block; + final = direct_blocks; + } else if ( (i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = EXT2_IND_BLOCK; + offsets[n++] = i_block; + final = ptrs; + } else if ((i_block -= indirect_blocks) < double_blocks) { + offsets[n++] = EXT2_DIND_BLOCK; + offsets[n++] = i_block >> ptrs_bits; + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { + offsets[n++] = EXT2_TIND_BLOCK; + offsets[n++] = i_block >> (ptrs_bits * 2); + offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else { + ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); + } + if (boundary) + *boundary = (i_block & (ptrs - 1)) == (final - 1); + return n; +} + +/** + * ext2_get_branch - read the chain of indirect blocks leading to data + * @inode: inode in question + * @depth: depth of the chain (1 - direct pointer, etc.) + * @offsets: offsets of pointers in inode/indirect blocks + * @chain: place to store the result + * @err: here we store the error value + * + * Function fills the array of triples <key, p, bh> and returns %NULL + * if everything went OK or the pointer to the last filled triple + * (incomplete one) otherwise. Upon the return chain[i].key contains + * the number of (i+1)-th block in the chain (as it is stored in memory, + * i.e. little-endian 32-bit), chain[i].p contains the address of that + * number (it points into struct inode for i==0 and into the bh->b_data + * for i>0) and chain[i].bh points to the buffer_head of i-th indirect + * block for i>0 and NULL for i==0. In other words, it holds the block + * numbers of the chain, addresses they were taken from (and where we can + * verify that chain did not change) and buffer_heads hosting these + * numbers. + * + * Function stops when it stumbles upon zero pointer (absent block) + * (pointer to last triple returned, *@err == 0) + * or when it gets an IO error reading an indirect block + * (ditto, *@err == -EIO) + * or when it notices that chain had been changed while it was reading + * (ditto, *@err == -EAGAIN) + * or when it reads all @depth-1 indirect blocks successfully and finds + * the whole chain, all way to the data (returns %NULL, *err == 0). + */ +static Indirect *ext2_get_branch(struct inode *inode, + int depth, + int *offsets, + Indirect chain[4], + int *err) +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; + + *err = 0; + /* i_data is not going away, no lock needed */ + add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets); + if (!p->key) + goto no_block; + while (--depth) { + bh = sb_bread(sb, le32_to_cpu(p->key)); + if (!bh) + goto failure; + read_lock(&EXT2_I(inode)->i_meta_lock); + if (!verify_chain(chain, p)) + goto changed; + add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); + read_unlock(&EXT2_I(inode)->i_meta_lock); + if (!p->key) + goto no_block; + } + return NULL; + +changed: + read_unlock(&EXT2_I(inode)->i_meta_lock); + brelse(bh); + *err = -EAGAIN; + goto no_block; +failure: + *err = -EIO; +no_block: + return p; +} + +/** + * ext2_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the prefered place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same cylinder group. + * + * In the latter case we colour the starting block by the callers PID to + * prevent it from clashing with concurrent allocations for a different inode + * in the same block group. The PID is used here so that functionally related + * files will be close-by on-disk. + * + * Caller must make sure that @ind is valid and will stay that way. + */ + +static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; + __le32 *p; + unsigned long bg_start; + unsigned long colour; + + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) + if (*p) + return le32_to_cpu(*p); + + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; + + /* + * It is going to be refered from inode itself? OK, just put it into + * the same cylinder group then. + */ + bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block); + colour = (current->pid % 16) * + (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); + return bg_start + colour; +} + +/** + * ext2_find_goal - find a prefered place for allocation. + * @inode: owner + * @block: block we want + * @chain: chain of indirect blocks + * @partial: pointer to the last triple within a chain + * @goal: place to store the result. + * + * Normally this function find the prefered place for block allocation, + * stores it in *@goal and returns zero. If the branch had been changed + * under us we return -EAGAIN. + */ + +static inline int ext2_find_goal(struct inode *inode, + long block, + Indirect chain[4], + Indirect *partial, + unsigned long *goal) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + write_lock(&ei->i_meta_lock); + if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) { + ei->i_next_alloc_block++; + ei->i_next_alloc_goal++; + } + if (verify_chain(chain, partial)) { + /* + * try the heuristic for sequential allocation, + * failing that at least try to get decent locality. + */ + if (block == ei->i_next_alloc_block) + *goal = ei->i_next_alloc_goal; + if (!*goal) + *goal = ext2_find_near(inode, partial); + write_unlock(&ei->i_meta_lock); + return 0; + } + write_unlock(&ei->i_meta_lock); + return -EAGAIN; +} + +/** + * ext2_alloc_branch - allocate and set up a chain of blocks. + * @inode: owner + * @num: depth of the chain (number of blocks to allocate) + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates @num blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext2_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext2_get_block(), excpet that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. + * + * If allocation fails we free all blocks we've allocated (and forget + * their buffer_heads) and return the error value the from failed + * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ + +static int ext2_alloc_branch(struct inode *inode, + int num, + unsigned long goal, + int *offsets, + Indirect *branch) +{ + int blocksize = inode->i_sb->s_blocksize; + int n = 0; + int err; + int i; + int parent = ext2_alloc_block(inode, goal, &err); + + branch[0].key = cpu_to_le32(parent); + if (parent) for (n = 1; n < num; n++) { + struct buffer_head *bh; + /* Allocate the next block */ + int nr = ext2_alloc_block(inode, parent, &err); + if (!nr) + break; + branch[n].key = cpu_to_le32(nr); + /* + * Get buffer_head for parent block, zero it out and set + * the pointer to new one, then send parent to disk. + */ + bh = sb_getblk(inode->i_sb, parent); + lock_buffer(bh); + memset(bh->b_data, 0, blocksize); + branch[n].bh = bh; + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + *branch[n].p = branch[n].key; + set_buffer_uptodate(bh); + unlock_buffer(bh); + mark_buffer_dirty_inode(bh, inode); + /* We used to sync bh here if IS_SYNC(inode). + * But we now rely upon generic_osync_inode() + * and b_inode_buffers. But not for directories. + */ + if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) + sync_dirty_buffer(bh); + parent = nr; + } + if (n == num) + return 0; + + /* Allocation failed, free what we already allocated */ + for (i = 1; i < n; i++) + bforget(branch[i].bh); + for (i = 0; i < n; i++) + ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); + return err; +} + +/** + * ext2_splice_branch - splice the allocated branch onto inode. + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext2_alloc_branch) + * @where: location of missing link + * @num: number of blocks we are adding + * + * This function verifies that chain (up to the missing link) had not + * changed, fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. Otherwise (== chain had been changed) + * we free the new blocks (forgetting their buffer_heads, indeed) and + * return -EAGAIN. + */ + +static inline int ext2_splice_branch(struct inode *inode, + long block, + Indirect chain[4], + Indirect *where, + int num) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + int i; + + /* Verify that place we are splicing to is still there and vacant */ + + write_lock(&ei->i_meta_lock); + if (!verify_chain(chain, where-1) || *where->p) + goto changed; + + /* That's it */ + + *where->p = where->key; + ei->i_next_alloc_block = block; + ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); + + write_unlock(&ei->i_meta_lock); + + /* We are done with atomic stuff, now do the rest of housekeeping */ + + inode->i_ctime = CURRENT_TIME_SEC; + + /* had we spliced it onto indirect block? */ + if (where->bh) + mark_buffer_dirty_inode(where->bh, inode); + + mark_inode_dirty(inode); + return 0; + +changed: + write_unlock(&ei->i_meta_lock); + for (i = 1; i < num; i++) + bforget(where[i].bh); + for (i = 0; i < num; i++) + ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); + return -EAGAIN; +} + +/* + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + */ + +int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) +{ + int err = -EIO; + int offsets[4]; + Indirect chain[4]; + Indirect *partial; + unsigned long goal; + int left; + int boundary = 0; + int depth = ext2_block_to_path(inode, iblock, offsets, &boundary); + + if (depth == 0) + goto out; + +reread: + partial = ext2_get_branch(inode, depth, offsets, chain, &err); + + /* Simplest case - block found, no allocation needed */ + if (!partial) { +got_it: + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + if (boundary) + set_buffer_boundary(bh_result); + /* Clean up and exit */ + partial = chain+depth-1; /* the whole chain */ + goto cleanup; + } + + /* Next simple case - plain lookup or failed read of indirect block */ + if (!create || err == -EIO) { +cleanup: + while (partial > chain) { + brelse(partial->bh); + partial--; + } +out: + return err; + } + + /* + * Indirect block might be removed by truncate while we were + * reading it. Handling of that case (forget what we've got and + * reread) is taken out of the main path. + */ + if (err == -EAGAIN) + goto changed; + + goal = 0; + if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0) + goto changed; + + left = (chain + depth) - partial; + err = ext2_alloc_branch(inode, left, goal, + offsets+(partial-chain), partial); + if (err) + goto cleanup; + + if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) + goto changed; + + set_buffer_new(bh_result); + goto got_it; + +changed: + while (partial > chain) { + brelse(partial->bh); + partial--; + } + goto reread; +} + +static int ext2_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, ext2_get_block, wbc); +} + +static int ext2_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, ext2_get_block); +} + +static int +ext2_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); +} + +static int +ext2_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page,from,to,ext2_get_block); +} + +static int +ext2_nobh_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return nobh_prepare_write(page,from,to,ext2_get_block); +} + +static int ext2_nobh_writepage(struct page *page, + struct writeback_control *wbc) +{ + return nobh_writepage(page, ext2_get_block, wbc); +} + +static sector_t ext2_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping,block,ext2_get_block); +} + +static int +ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int ret; + + ret = ext2_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + +static ssize_t +ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, ext2_get_blocks, NULL); +} + +static int +ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, ext2_get_block); +} + +struct address_space_operations ext2_aops = { + .readpage = ext2_readpage, + .readpages = ext2_readpages, + .writepage = ext2_writepage, + .sync_page = block_sync_page, + .prepare_write = ext2_prepare_write, + .commit_write = generic_commit_write, + .bmap = ext2_bmap, + .direct_IO = ext2_direct_IO, + .writepages = ext2_writepages, +}; + +struct address_space_operations ext2_nobh_aops = { + .readpage = ext2_readpage, + .readpages = ext2_readpages, + .writepage = ext2_nobh_writepage, + .sync_page = block_sync_page, + .prepare_write = ext2_nobh_prepare_write, + .commit_write = nobh_commit_write, + .bmap = ext2_bmap, + .direct_IO = ext2_direct_IO, + .writepages = ext2_writepages, +}; + +/* + * Probably it should be a library function... search for first non-zero word + * or memcmp with zero_page, whatever is better for particular architecture. + * Linus? + */ +static inline int all_zeroes(__le32 *p, __le32 *q) +{ + while (p < q) + if (*p++) + return 0; + return 1; +} + +/** + * ext2_find_shared - find the indirect blocks for partial truncation. + * @inode: inode in question + * @depth: depth of the affected branch + * @offsets: offsets of pointers in that branch (see ext2_block_to_path) + * @chain: place to store the pointers to partial indirect blocks + * @top: place to the (detached) top of branch + * + * This is a helper function used by ext2_truncate(). + * + * When we do truncate() we may have to clean the ends of several indirect + * blocks but leave the blocks themselves alive. Block is partially + * truncated if some data below the new i_size is refered from it (and + * it is on the path to the first completely truncated data block, indeed). + * We have to free the top of that path along with everything to the right + * of the path. Since no allocation past the truncation point is possible + * until ext2_truncate() finishes, we may safely do the latter, but top + * of branch may require special attention - pageout below the truncation + * point might try to populate it. + * + * We atomically detach the top of branch from the tree, store the block + * number of its root in *@top, pointers to buffer_heads of partially + * truncated blocks - in @chain[].bh and pointers to their last elements + * that should not be removed - in @chain[].p. Return value is the pointer + * to last filled element of @chain. + * + * The work left to caller to do the actual freeing of subtrees: + * a) free the subtree starting from *@top + * b) free the subtrees whose roots are stored in + * (@chain[i].p+1 .. end of @chain[i].bh->b_data) + * c) free the subtrees growing from the inode past the @chain[0].p + * (no partially truncated stuff there). + */ + +static Indirect *ext2_find_shared(struct inode *inode, + int depth, + int offsets[4], + Indirect chain[4], + __le32 *top) +{ + Indirect *partial, *p; + int k, err; + + *top = 0; + for (k = depth; k > 1 && !offsets[k-1]; k--) + ; + partial = ext2_get_branch(inode, k, offsets, chain, &err); + if (!partial) + partial = chain + k-1; + /* + * If the branch acquired continuation since we've looked at it - + * fine, it should all survive and (new) top doesn't belong to us. + */ + write_lock(&EXT2_I(inode)->i_meta_lock); + if (!partial->key && *partial->p) { + write_unlock(&EXT2_I(inode)->i_meta_lock); + goto no_top; + } + for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) + ; + /* + * OK, we've found the last block that must survive. The rest of our + * branch should be detached before unlocking. However, if that rest + * of branch is all ours and does not grow immediately from the inode + * it's easier to cheat and just decrement partial->p. + */ + if (p == chain + k - 1 && p > chain) { + p->p--; + } else { + *top = *p->p; + *p->p = 0; + } + write_unlock(&EXT2_I(inode)->i_meta_lock); + + while(partial > p) + { + brelse(partial->bh); + partial--; + } +no_top: + return partial; +} + +/** + * ext2_free_data - free a list of data blocks + * @inode: inode we are dealing with + * @p: array of block numbers + * @q: points immediately past the end of array + * + * We are freeing all blocks refered from that array (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) +{ + unsigned long block_to_free = 0, count = 0; + unsigned long nr; + + for ( ; p < q ; p++) { + nr = le32_to_cpu(*p); + if (nr) { + *p = 0; + /* accumulate blocks to free if they're contiguous */ + if (count == 0) + goto free_this; + else if (block_to_free == nr - count) + count++; + else { + mark_inode_dirty(inode); + ext2_free_blocks (inode, block_to_free, count); + free_this: + block_to_free = nr; + count = 1; + } + } + } + if (count > 0) { + mark_inode_dirty(inode); + ext2_free_blocks (inode, block_to_free, count); + } +} + +/** + * ext2_free_branches - free an array of branches + * @inode: inode we are dealing with + * @p: array of block numbers + * @q: pointer immediately past the end of array + * @depth: depth of the branches to free + * + * We are freeing all blocks refered from these branches (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth) +{ + struct buffer_head * bh; + unsigned long nr; + + if (depth--) { + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + for ( ; p < q ; p++) { + nr = le32_to_cpu(*p); + if (!nr) + continue; + *p = 0; + bh = sb_bread(inode->i_sb, nr); + /* + * A read failure? Report error and clear slot + * (should be rare). + */ + if (!bh) { + ext2_error(inode->i_sb, "ext2_free_branches", + "Read failure, inode=%ld, block=%ld", + inode->i_ino, nr); + continue; + } + ext2_free_branches(inode, + (__le32*)bh->b_data, + (__le32*)bh->b_data + addr_per_block, + depth); + bforget(bh); + ext2_free_blocks(inode, nr, 1); + mark_inode_dirty(inode); + } + } else + ext2_free_data(inode, p, q); +} + +void ext2_truncate (struct inode * inode) +{ + __le32 *i_data = EXT2_I(inode)->i_data; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + int offsets[4]; + Indirect chain[4]; + Indirect *partial; + __le32 nr = 0; + int n; + long iblock; + unsigned blocksize; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + if (ext2_inode_is_fast_symlink(inode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + ext2_discard_prealloc(inode); + + blocksize = inode->i_sb->s_blocksize; + iblock = (inode->i_size + blocksize-1) + >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); + + if (test_opt(inode->i_sb, NOBH)) + nobh_truncate_page(inode->i_mapping, inode->i_size); + else + block_truncate_page(inode->i_mapping, + inode->i_size, ext2_get_block); + + n = ext2_block_to_path(inode, iblock, offsets, NULL); + if (n == 0) + return; + + if (n == 1) { + ext2_free_data(inode, i_data+offsets[0], + i_data + EXT2_NDIR_BLOCKS); + goto do_indirects; + } + + partial = ext2_find_shared(inode, n, offsets, chain, &nr); + /* Kill the top of shared branch (already detached) */ + if (nr) { + if (partial == chain) + mark_inode_dirty(inode); + else + mark_buffer_dirty_inode(partial->bh, inode); + ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); + } + /* Clear the ends of indirect blocks on the shared branch */ + while (partial > chain) { + ext2_free_branches(inode, + partial->p + 1, + (__le32*)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + mark_buffer_dirty_inode(partial->bh, inode); + brelse (partial->bh); + partial--; + } +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + nr = i_data[EXT2_IND_BLOCK]; + if (nr) { + i_data[EXT2_IND_BLOCK] = 0; + mark_inode_dirty(inode); + ext2_free_branches(inode, &nr, &nr+1, 1); + } + case EXT2_IND_BLOCK: + nr = i_data[EXT2_DIND_BLOCK]; + if (nr) { + i_data[EXT2_DIND_BLOCK] = 0; + mark_inode_dirty(inode); + ext2_free_branches(inode, &nr, &nr+1, 2); + } + case EXT2_DIND_BLOCK: + nr = i_data[EXT2_TIND_BLOCK]; + if (nr) { + i_data[EXT2_TIND_BLOCK] = 0; + mark_inode_dirty(inode); + ext2_free_branches(inode, &nr, &nr+1, 3); + } + case EXT2_TIND_BLOCK: + ; + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + if (inode_needs_sync(inode)) { + sync_mapping_buffers(inode->i_mapping); + ext2_sync_inode (inode); + } else { + mark_inode_dirty(inode); + } +} + +static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, + struct buffer_head **p) +{ + struct buffer_head * bh; + unsigned long block_group; + unsigned long block; + unsigned long offset; + struct ext2_group_desc * gdp; + + *p = NULL; + if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) || + ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) + goto Einval; + + block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); + gdp = ext2_get_group_desc(sb, block_group, &bh); + if (!gdp) + goto Egdp; + /* + * Figure out the offset within the block group inode table + */ + offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb); + block = le32_to_cpu(gdp->bg_inode_table) + + (offset >> EXT2_BLOCK_SIZE_BITS(sb)); + if (!(bh = sb_bread(sb, block))) + goto Eio; + + *p = bh; + offset &= (EXT2_BLOCK_SIZE(sb) - 1); + return (struct ext2_inode *) (bh->b_data + offset); + +Einval: + ext2_error(sb, "ext2_get_inode", "bad inode number: %lu", + (unsigned long) ino); + return ERR_PTR(-EINVAL); +Eio: + ext2_error(sb, "ext2_get_inode", + "unable to read inode block - inode=%lu, block=%lu", + (unsigned long) ino, block); +Egdp: + return ERR_PTR(-EIO); +} + +void ext2_set_inode_flags(struct inode *inode) +{ + unsigned int flags = EXT2_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + if (flags & EXT2_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & EXT2_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & EXT2_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +void ext2_read_inode (struct inode * inode) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + ino_t ino = inode->i_ino; + struct buffer_head * bh; + struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + int n; + +#ifdef CONFIG_EXT2_FS_POSIX_ACL + ei->i_acl = EXT2_ACL_NOT_CACHED; + ei->i_default_acl = EXT2_ACL_NOT_CACHED; +#endif + if (IS_ERR(raw_inode)) + goto bad_inode; + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + if (!(test_opt (inode->i_sb, NO_UID32))) { + inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + } + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + inode->i_size = le32_to_cpu(raw_inode->i_size); + inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; + ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); + /* We now have enough fields to check if the inode was active or not. + * This is needed because nfsd might try to access dead inodes + * the test is that same one that e2fsck uses + * NeilBrown 1999oct15 + */ + if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { + /* this inode is deleted */ + brelse (bh); + goto bad_inode; + } + inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ + inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); + ei->i_flags = le32_to_cpu(raw_inode->i_flags); + ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); + ei->i_frag_no = raw_inode->i_frag; + ei->i_frag_size = raw_inode->i_fsize; + ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); + ei->i_dir_acl = 0; + if (S_ISREG(inode->i_mode)) + inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; + else + ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + ei->i_dtime = 0; + inode->i_generation = le32_to_cpu(raw_inode->i_generation); + ei->i_state = 0; + ei->i_next_alloc_block = 0; + ei->i_next_alloc_goal = 0; + ei->i_prealloc_count = 0; + ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); + ei->i_dir_start_lookup = 0; + + /* + * NOTE! The in-memory inode i_data array is in little-endian order + * even on big-endian machines: we do NOT byteswap the block numbers! + */ + for (n = 0; n < EXT2_N_BLOCKS; n++) + ei->i_data[n] = raw_inode->i_block[n]; + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ext2_dir_inode_operations; + inode->i_fop = &ext2_dir_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + } else if (S_ISLNK(inode->i_mode)) { + if (ext2_inode_is_fast_symlink(inode)) + inode->i_op = &ext2_fast_symlink_inode_operations; + else { + inode->i_op = &ext2_symlink_inode_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + } + } else { + inode->i_op = &ext2_special_inode_operations; + if (raw_inode->i_block[0]) + init_special_inode(inode, inode->i_mode, + old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); + else + init_special_inode(inode, inode->i_mode, + new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); + } + brelse (bh); + ext2_set_inode_flags(inode); + return; + +bad_inode: + make_bad_inode(inode); + return; +} + +static int ext2_update_inode(struct inode * inode, int do_sync) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + struct super_block *sb = inode->i_sb; + ino_t ino = inode->i_ino; + uid_t uid = inode->i_uid; + gid_t gid = inode->i_gid; + struct buffer_head * bh; + struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); + int n; + int err = 0; + + if (IS_ERR(raw_inode)) + return -EIO; + + /* For fields not not tracking in the in-memory inode, + * initialise them to zero for new inodes. */ + if (ei->i_state & EXT2_STATE_NEW) + memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + if (!(test_opt(sb, NO_UID32))) { + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); +/* + * Fix up interoperability with old kernels. Otherwise, old inodes get + * re-used with the upper 16 bits of the uid/gid intact + */ + if (!ei->i_dtime) { + raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid)); + raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid)); + } else { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + } else { + raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid)); + raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid)); + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + raw_inode->i_size = cpu_to_le32(inode->i_size); + raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); + raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); + raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); + raw_inode->i_flags = cpu_to_le32(ei->i_flags); + raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); + raw_inode->i_frag = ei->i_frag_no; + raw_inode->i_fsize = ei->i_frag_size; + raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); + if (!S_ISREG(inode->i_mode)) + raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); + else { + raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); + if (inode->i_size > 0x7fffffffULL) { + if (!EXT2_HAS_RO_COMPAT_FEATURE(sb, + EXT2_FEATURE_RO_COMPAT_LARGE_FILE) || + EXT2_SB(sb)->s_es->s_rev_level == + cpu_to_le32(EXT2_GOOD_OLD_REV)) { + /* If this is the first large file + * created, add a flag to the superblock. + */ + lock_kernel(); + ext2_update_dynamic_rev(sb); + EXT2_SET_RO_COMPAT_FEATURE(sb, + EXT2_FEATURE_RO_COMPAT_LARGE_FILE); + unlock_kernel(); + ext2_write_super(sb); + } + } + } + + raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + raw_inode->i_block[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + raw_inode->i_block[1] = 0; + } else { + raw_inode->i_block[0] = 0; + raw_inode->i_block[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + raw_inode->i_block[2] = 0; + } + } else for (n = 0; n < EXT2_N_BLOCKS; n++) + raw_inode->i_block[n] = ei->i_data[n]; + mark_buffer_dirty(bh); + if (do_sync) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk ("IO error syncing ext2 inode [%s:%08lx]\n", + sb->s_id, (unsigned long) ino); + err = -EIO; + } + } + ei->i_state &= ~EXT2_STATE_NEW; + brelse (bh); + return err; +} + +int ext2_write_inode(struct inode *inode, int wait) +{ + return ext2_update_inode(inode, wait); +} + +int ext2_sync_inode(struct inode *inode) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* sys_fsync did this */ + }; + return sync_inode(inode, &wbc); +} + +int ext2_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, iattr); + if (error) + return error; + if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; + if (error) + return error; + } + error = inode_setattr(inode, iattr); + if (!error && (iattr->ia_valid & ATTR_MODE)) + error = ext2_acl_chmod(inode); + return error; +} diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c new file mode 100644 index 000000000000..709d8676b962 --- /dev/null +++ b/fs/ext2/ioctl.c @@ -0,0 +1,81 @@ +/* + * linux/fs/ext2/ioctl.c + * + * Copyright (C) 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include "ext2.h" +#include <linux/time.h> +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/uaccess.h> + + +int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, + unsigned long arg) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + unsigned int flags; + + ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { + case EXT2_IOC_GETFLAGS: + flags = ei->i_flags & EXT2_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case EXT2_IOC_SETFLAGS: { + unsigned int oldflags; + + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~EXT2_DIRSYNC_FL; + + oldflags = ei->i_flags; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + } + + flags = flags & EXT2_FL_USER_MODIFIABLE; + flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; + ei->i_flags = flags; + + ext2_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + return 0; + } + case EXT2_IOC_GETVERSION: + return put_user(inode->i_generation, (int __user *) arg); + case EXT2_IOC_SETVERSION: + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (get_user(inode->i_generation, (int __user *) arg)) + return -EFAULT; + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + return 0; + default: + return -ENOTTY; + } +} diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c new file mode 100644 index 000000000000..3176b3d3ffa8 --- /dev/null +++ b/fs/ext2/namei.c @@ -0,0 +1,418 @@ +/* + * linux/fs/ext2/namei.c + * + * Rewrite to pagecache. Almost all code had been changed, so blame me + * if the things go wrong. Please, send bug reports to + * viro@parcelfarce.linux.theplanet.co.uk + * + * Stuff here is basically a glue between the VFS and generic UNIXish + * filesystem that keeps everything in pagecache. All knowledge of the + * directory layout is in fs/ext2/dir.c - it turned out to be easily separatable + * and it's easier to debug that way. In principle we might want to + * generalize that a bit and turn it into a library. Or not. + * + * The only non-static object here is ext2_dir_inode_operations. + * + * TODO: get rid of kmap() use, add readahead. + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/pagemap.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +/* + * Couple of helper functions - make the code slightly cleaner. + */ + +static inline void ext2_inc_count(struct inode *inode) +{ + inode->i_nlink++; + mark_inode_dirty(inode); +} + +static inline void ext2_dec_count(struct inode *inode) +{ + inode->i_nlink--; + mark_inode_dirty(inode); +} + +static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) +{ + int err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + ext2_dec_count(inode); + iput(inode); + return err; +} + +/* + * Methods themselves. + */ + +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode * inode; + ino_t ino; + + if (dentry->d_name.len > EXT2_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ino = ext2_inode_by_name(dir, dentry); + inode = NULL; + if (ino) { + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + } + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + return NULL; +} + +struct dentry *ext2_get_parent(struct dentry *child) +{ + unsigned long ino; + struct dentry *parent; + struct inode *inode; + struct dentry dotdot; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + + ino = ext2_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + inode = iget(child->d_inode->i_sb, ino); + + if (!inode) + return ERR_PTR(-EACCES); + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + return parent; +} + +/* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) +{ + struct inode * inode = ext2_new_inode (dir, mode); + int err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + mark_inode_dirty(inode); + err = ext2_add_nondir(dentry, inode); + } + return err; +} + +static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) +{ + struct inode * inode; + int err; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + inode = ext2_new_inode (dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, inode->i_mode, rdev); +#ifdef CONFIG_EXT2_FS_XATTR + inode->i_op = &ext2_special_inode_operations; +#endif + mark_inode_dirty(inode); + err = ext2_add_nondir(dentry, inode); + } + return err; +} + +static int ext2_symlink (struct inode * dir, struct dentry * dentry, + const char * symname) +{ + struct super_block * sb = dir->i_sb; + int err = -ENAMETOOLONG; + unsigned l = strlen(symname)+1; + struct inode * inode; + + if (l > sb->s_blocksize) + goto out; + + inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out; + + if (l > sizeof (EXT2_I(inode)->i_data)) { + /* slow symlink */ + inode->i_op = &ext2_symlink_inode_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + err = page_symlink(inode, symname, l); + if (err) + goto out_fail; + } else { + /* fast symlink */ + inode->i_op = &ext2_fast_symlink_inode_operations; + memcpy((char*)(EXT2_I(inode)->i_data),symname,l); + inode->i_size = l-1; + } + mark_inode_dirty(inode); + + err = ext2_add_nondir(dentry, inode); +out: + return err; + +out_fail: + ext2_dec_count(inode); + iput (inode); + goto out; +} + +static int ext2_link (struct dentry * old_dentry, struct inode * dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + + if (inode->i_nlink >= EXT2_LINK_MAX) + return -EMLINK; + + inode->i_ctime = CURRENT_TIME_SEC; + ext2_inc_count(inode); + atomic_inc(&inode->i_count); + + return ext2_add_nondir(dentry, inode); +} + +static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + struct inode * inode; + int err = -EMLINK; + + if (dir->i_nlink >= EXT2_LINK_MAX) + goto out; + + ext2_inc_count(dir); + + inode = ext2_new_inode (dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_dir; + + inode->i_op = &ext2_dir_inode_operations; + inode->i_fop = &ext2_dir_operations; + if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; + + ext2_inc_count(inode); + + err = ext2_make_empty(inode, dir); + if (err) + goto out_fail; + + err = ext2_add_link(dentry, inode); + if (err) + goto out_fail; + + d_instantiate(dentry, inode); +out: + return err; + +out_fail: + ext2_dec_count(inode); + ext2_dec_count(inode); + iput(inode); +out_dir: + ext2_dec_count(dir); + goto out; +} + +static int ext2_unlink(struct inode * dir, struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + struct ext2_dir_entry_2 * de; + struct page * page; + int err = -ENOENT; + + de = ext2_find_entry (dir, dentry, &page); + if (!de) + goto out; + + err = ext2_delete_entry (de, page); + if (err) + goto out; + + inode->i_ctime = dir->i_ctime; + ext2_dec_count(inode); + err = 0; +out: + return err; +} + +static int ext2_rmdir (struct inode * dir, struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + int err = -ENOTEMPTY; + + if (ext2_empty_dir(inode)) { + err = ext2_unlink(dir, dentry); + if (!err) { + inode->i_size = 0; + ext2_dec_count(inode); + ext2_dec_count(dir); + } + } + return err; +} + +static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, + struct inode * new_dir, struct dentry * new_dentry ) +{ + struct inode * old_inode = old_dentry->d_inode; + struct inode * new_inode = new_dentry->d_inode; + struct page * dir_page = NULL; + struct ext2_dir_entry_2 * dir_de = NULL; + struct page * old_page; + struct ext2_dir_entry_2 * old_de; + int err = -ENOENT; + + old_de = ext2_find_entry (old_dir, old_dentry, &old_page); + if (!old_de) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + dir_de = ext2_dotdot(old_inode, &dir_page); + if (!dir_de) + goto out_old; + } + + if (new_inode) { + struct page *new_page; + struct ext2_dir_entry_2 *new_de; + + err = -ENOTEMPTY; + if (dir_de && !ext2_empty_dir (new_inode)) + goto out_dir; + + err = -ENOENT; + new_de = ext2_find_entry (new_dir, new_dentry, &new_page); + if (!new_de) + goto out_dir; + ext2_inc_count(old_inode); + ext2_set_link(new_dir, new_de, new_page, old_inode); + new_inode->i_ctime = CURRENT_TIME_SEC; + if (dir_de) + new_inode->i_nlink--; + ext2_dec_count(new_inode); + } else { + if (dir_de) { + err = -EMLINK; + if (new_dir->i_nlink >= EXT2_LINK_MAX) + goto out_dir; + } + ext2_inc_count(old_inode); + err = ext2_add_link(new_dentry, old_inode); + if (err) { + ext2_dec_count(old_inode); + goto out_dir; + } + if (dir_de) + ext2_inc_count(new_dir); + } + + /* + * Like most other Unix systems, set the ctime for inodes on a + * rename. + * ext2_dec_count() will mark the inode dirty. + */ + old_inode->i_ctime = CURRENT_TIME_SEC; + + ext2_delete_entry (old_de, old_page); + ext2_dec_count(old_inode); + + if (dir_de) { + ext2_set_link(old_inode, dir_de, dir_page, new_dir); + ext2_dec_count(old_dir); + } + return 0; + + +out_dir: + if (dir_de) { + kunmap(dir_page); + page_cache_release(dir_page); + } +out_old: + kunmap(old_page); + page_cache_release(old_page); +out: + return err; +} + +struct inode_operations ext2_dir_inode_operations = { + .create = ext2_create, + .lookup = ext2_lookup, + .link = ext2_link, + .unlink = ext2_unlink, + .symlink = ext2_symlink, + .mkdir = ext2_mkdir, + .rmdir = ext2_rmdir, + .mknod = ext2_mknod, + .rename = ext2_rename, +#ifdef CONFIG_EXT2_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext2_listxattr, + .removexattr = generic_removexattr, +#endif + .setattr = ext2_setattr, + .permission = ext2_permission, +}; + +struct inode_operations ext2_special_inode_operations = { +#ifdef CONFIG_EXT2_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext2_listxattr, + .removexattr = generic_removexattr, +#endif + .setattr = ext2_setattr, + .permission = ext2_permission, +}; diff --git a/fs/ext2/super.c b/fs/ext2/super.c new file mode 100644 index 000000000000..37ca77a157ba --- /dev/null +++ b/fs/ext2/super.c @@ -0,0 +1,1161 @@ +/* + * linux/fs/ext2/super.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/parser.h> +#include <linux/random.h> +#include <linux/buffer_head.h> +#include <linux/smp_lock.h> +#include <linux/vfs.h> +#include <asm/uaccess.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +static void ext2_sync_super(struct super_block *sb, + struct ext2_super_block *es); +static int ext2_remount (struct super_block * sb, int * flags, char * data); +static int ext2_statfs (struct super_block * sb, struct kstatfs * buf); + +void ext2_error (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + va_list args; + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + + if (!(sb->s_flags & MS_RDONLY)) { + sbi->s_mount_state |= EXT2_ERROR_FS; + es->s_state = + cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS); + ext2_sync_super(sb, es); + } + + va_start(args, fmt); + printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + if (test_opt(sb, ERRORS_PANIC)) + panic("EXT2-fs panic from previous error\n"); + if (test_opt(sb, ERRORS_RO)) { + printk("Remounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + } +} + +void ext2_warning (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ", + sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + +void ext2_update_dynamic_rev(struct super_block *sb) +{ + struct ext2_super_block *es = EXT2_SB(sb)->s_es; + + if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) + return; + + ext2_warning(sb, __FUNCTION__, + "updating to rev %d because of new feature flag, " + "running e2fsck is recommended", + EXT2_DYNAMIC_REV); + + es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); + es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE); + es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV); + /* leave es->s_feature_*compat flags alone */ + /* es->s_uuid will be set by e2fsck if empty */ + + /* + * The rest of the superblock fields should be zero, and if not it + * means they are likely already in use, so leave them alone. We + * can leave it up to e2fsck to clean up any inconsistencies there. + */ +} + +static void ext2_put_super (struct super_block * sb) +{ + int db_count; + int i; + struct ext2_sb_info *sbi = EXT2_SB(sb); + + ext2_xattr_put_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { + struct ext2_super_block *es = sbi->s_es; + + es->s_state = cpu_to_le16(sbi->s_mount_state); + ext2_sync_super(sb, es); + } + db_count = sbi->s_gdb_count; + for (i = 0; i < db_count; i++) + if (sbi->s_group_desc[i]) + brelse (sbi->s_group_desc[i]); + kfree(sbi->s_group_desc); + kfree(sbi->s_debts); + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); + brelse (sbi->s_sbh); + sb->s_fs_info = NULL; + kfree(sbi); + + return; +} + +static kmem_cache_t * ext2_inode_cachep; + +static struct inode *ext2_alloc_inode(struct super_block *sb) +{ + struct ext2_inode_info *ei; + ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); + if (!ei) + return NULL; +#ifdef CONFIG_EXT2_FS_POSIX_ACL + ei->i_acl = EXT2_ACL_NOT_CACHED; + ei->i_default_acl = EXT2_ACL_NOT_CACHED; +#endif + ei->vfs_inode.i_version = 1; + return &ei->vfs_inode; +} + +static void ext2_destroy_inode(struct inode *inode) +{ + kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); +} + +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + rwlock_init(&ei->i_meta_lock); +#ifdef CONFIG_EXT2_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", + sizeof(struct ext2_inode_info), + 0, SLAB_RECLAIM_ACCOUNT, + init_once, NULL); + if (ext2_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + if (kmem_cache_destroy(ext2_inode_cachep)) + printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n"); +} + +static void ext2_clear_inode(struct inode *inode) +{ +#ifdef CONFIG_EXT2_FS_POSIX_ACL + struct ext2_inode_info *ei = EXT2_I(inode); + + if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) { + posix_acl_release(ei->i_acl); + ei->i_acl = EXT2_ACL_NOT_CACHED; + } + if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) { + posix_acl_release(ei->i_default_acl); + ei->i_default_acl = EXT2_ACL_NOT_CACHED; + } +#endif + if (!is_bad_inode(inode)) + ext2_discard_prealloc(inode); +} + + +#ifdef CONFIG_QUOTA +static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); +static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +#endif + +static struct super_operations ext2_sops = { + .alloc_inode = ext2_alloc_inode, + .destroy_inode = ext2_destroy_inode, + .read_inode = ext2_read_inode, + .write_inode = ext2_write_inode, + .delete_inode = ext2_delete_inode, + .put_super = ext2_put_super, + .write_super = ext2_write_super, + .statfs = ext2_statfs, + .remount_fs = ext2_remount, + .clear_inode = ext2_clear_inode, +#ifdef CONFIG_QUOTA + .quota_read = ext2_quota_read, + .quota_write = ext2_quota_write, +#endif +}; + +/* Yes, most of these are left as NULL!! + * A NULL value implies the default, which works with ext2-like file + * systems, but can be improved upon. + * Currently only get_parent is required. + */ +struct dentry *ext2_get_parent(struct dentry *child); +static struct export_operations ext2_export_ops = { + .get_parent = ext2_get_parent, +}; + +static unsigned long get_sb_block(void **data) +{ + unsigned long sb_block; + char *options = (char *) *data; + + if (!options || strncmp(options, "sb=", 3) != 0) + return 1; /* Default location */ + options += 3; + sb_block = simple_strtoul(options, &options, 0); + if (*options && *options != ',') { + printk("EXT2-fs: Invalid sb specification: %s\n", + (char *) *data); + return 1; + } + if (*options == ',') + options++; + *data = (void *) options; + return sb_block; +} + +enum { + Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, + Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_ignore, Opt_err, +}; + +static match_table_t tokens = { + {Opt_bsd_df, "bsddf"}, + {Opt_minix_df, "minixdf"}, + {Opt_grpid, "grpid"}, + {Opt_grpid, "bsdgroups"}, + {Opt_nogrpid, "nogrpid"}, + {Opt_nogrpid, "sysvgroups"}, + {Opt_resgid, "resgid=%u"}, + {Opt_resuid, "resuid=%u"}, + {Opt_sb, "sb=%u"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, + {Opt_nouid32, "nouid32"}, + {Opt_nocheck, "check=none"}, + {Opt_nocheck, "nocheck"}, + {Opt_check, "check"}, + {Opt_debug, "debug"}, + {Opt_oldalloc, "oldalloc"}, + {Opt_orlov, "orlov"}, + {Opt_nobh, "nobh"}, + {Opt_user_xattr, "user_xattr"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_ignore, "grpquota"}, + {Opt_ignore, "noquota"}, + {Opt_ignore, "quota"}, + {Opt_ignore, "usrquota"}, + {Opt_err, NULL} +}; + +static int parse_options (char * options, + struct ext2_sb_info *sbi) +{ + char * p; + substring_t args[MAX_OPT_ARGS]; + unsigned long kind = EXT2_MOUNT_ERRORS_CONT; + int option; + + if (!options) + return 1; + + while ((p = strsep (&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_bsd_df: + clear_opt (sbi->s_mount_opt, MINIX_DF); + break; + case Opt_minix_df: + set_opt (sbi->s_mount_opt, MINIX_DF); + break; + case Opt_grpid: + set_opt (sbi->s_mount_opt, GRPID); + break; + case Opt_nogrpid: + clear_opt (sbi->s_mount_opt, GRPID); + break; + case Opt_resuid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_resuid = option; + break; + case Opt_resgid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_resgid = option; + break; + case Opt_sb: + /* handled by get_sb_block() instead of here */ + /* *sb_block = match_int(&args[0]); */ + break; + case Opt_err_panic: + kind = EXT2_MOUNT_ERRORS_PANIC; + break; + case Opt_err_ro: + kind = EXT2_MOUNT_ERRORS_RO; + break; + case Opt_err_cont: + kind = EXT2_MOUNT_ERRORS_CONT; + break; + case Opt_nouid32: + set_opt (sbi->s_mount_opt, NO_UID32); + break; + case Opt_check: +#ifdef CONFIG_EXT2_CHECK + set_opt (sbi->s_mount_opt, CHECK); +#else + printk("EXT2 Check option not supported\n"); +#endif + break; + case Opt_nocheck: + clear_opt (sbi->s_mount_opt, CHECK); + break; + case Opt_debug: + set_opt (sbi->s_mount_opt, DEBUG); + break; + case Opt_oldalloc: + set_opt (sbi->s_mount_opt, OLDALLOC); + break; + case Opt_orlov: + clear_opt (sbi->s_mount_opt, OLDALLOC); + break; + case Opt_nobh: + set_opt (sbi->s_mount_opt, NOBH); + break; +#ifdef CONFIG_EXT2_FS_XATTR + case Opt_user_xattr: + set_opt (sbi->s_mount_opt, XATTR_USER); + break; + case Opt_nouser_xattr: + clear_opt (sbi->s_mount_opt, XATTR_USER); + break; +#else + case Opt_user_xattr: + case Opt_nouser_xattr: + printk("EXT2 (no)user_xattr options not supported\n"); + break; +#endif +#ifdef CONFIG_EXT2_FS_POSIX_ACL + case Opt_acl: + set_opt(sbi->s_mount_opt, POSIX_ACL); + break; + case Opt_noacl: + clear_opt(sbi->s_mount_opt, POSIX_ACL); + break; +#else + case Opt_acl: + case Opt_noacl: + printk("EXT2 (no)acl options not supported\n"); + break; +#endif + case Opt_ignore: + break; + default: + return 0; + } + } + sbi->s_mount_opt |= kind; + return 1; +} + +static int ext2_setup_super (struct super_block * sb, + struct ext2_super_block * es, + int read_only) +{ + int res = 0; + struct ext2_sb_info *sbi = EXT2_SB(sb); + + if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { + printk ("EXT2-fs warning: revision level too high, " + "forcing read-only mode\n"); + res = MS_RDONLY; + } + if (read_only) + return res; + if (!(sbi->s_mount_state & EXT2_VALID_FS)) + printk ("EXT2-fs warning: mounting unchecked fs, " + "running e2fsck is recommended\n"); + else if ((sbi->s_mount_state & EXT2_ERROR_FS)) + printk ("EXT2-fs warning: mounting fs with errors, " + "running e2fsck is recommended\n"); + else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && + le16_to_cpu(es->s_mnt_count) >= + (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) + printk ("EXT2-fs warning: maximal mount count reached, " + "running e2fsck is recommended\n"); + else if (le32_to_cpu(es->s_checkinterval) && + (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) + printk ("EXT2-fs warning: checktime reached, " + "running e2fsck is recommended\n"); + if (!le16_to_cpu(es->s_max_mnt_count)) + es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); + es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); + ext2_write_super(sb); + if (test_opt (sb, DEBUG)) + printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " + "bpg=%lu, ipg=%lu, mo=%04lx]\n", + EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, + sbi->s_frag_size, + sbi->s_groups_count, + EXT2_BLOCKS_PER_GROUP(sb), + EXT2_INODES_PER_GROUP(sb), + sbi->s_mount_opt); +#ifdef CONFIG_EXT2_CHECK + if (test_opt (sb, CHECK)) { + ext2_check_blocks_bitmap (sb); + ext2_check_inodes_bitmap (sb); + } +#endif + return res; +} + +static int ext2_check_descriptors (struct super_block * sb) +{ + int i; + int desc_block = 0; + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); + struct ext2_group_desc * gdp = NULL; + + ext2_debug ("Checking group descriptors"); + + for (i = 0; i < sbi->s_groups_count; i++) + { + if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; + if (le32_to_cpu(gdp->bg_block_bitmap) < block || + le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Block bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap)); + return 0; + } + if (le32_to_cpu(gdp->bg_inode_bitmap) < block || + le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Inode bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap)); + return 0; + } + if (le32_to_cpu(gdp->bg_inode_table) < block || + le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= + block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Inode table for group %d" + " not in group (block %lu)!", + i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); + return 0; + } + block += EXT2_BLOCKS_PER_GROUP(sb); + gdp++; + } + return 1; +} + +#define log2(n) ffz(~(n)) + +/* + * Maximal file size. There is a direct, and {,double-,triple-}indirect + * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. + * We need to be 1 filesystem block less than the 2^32 sector limit. + */ +static loff_t ext2_max_size(int bits) +{ + loff_t res = EXT2_NDIR_BLOCKS; + /* This constant is calculated to be the largest file size for a + * dense, 4k-blocksize file such that the total number of + * sectors in the file, including data and all indirect blocks, + * does not exceed 2^32. */ + const loff_t upper_limit = 0x1ff7fffd000LL; + + res += 1LL << (bits-2); + res += 1LL << (2*(bits-2)); + res += 1LL << (3*(bits-2)); + res <<= bits; + if (res > upper_limit) + res = upper_limit; + return res; +} + +static unsigned long descriptor_loc(struct super_block *sb, + unsigned long logic_sb_block, + int nr) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned long bg, first_data_block, first_meta_bg; + int has_super = 0; + + first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block); + first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); + + if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) || + nr < first_meta_bg) + return (logic_sb_block + nr + 1); + bg = sbi->s_desc_per_block * nr; + if (ext2_bg_has_super(sb, bg)) + has_super = 1; + return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); +} + +static int ext2_fill_super(struct super_block *sb, void *data, int silent) +{ + struct buffer_head * bh; + struct ext2_sb_info * sbi; + struct ext2_super_block * es; + struct inode *root; + unsigned long block; + unsigned long sb_block = get_sb_block(&data); + unsigned long logic_sb_block; + unsigned long offset = 0; + unsigned long def_mount_opts; + int blocksize = BLOCK_SIZE; + int db_count; + int i, j; + __le32 features; + + sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + sb->s_fs_info = sbi; + memset(sbi, 0, sizeof(*sbi)); + + /* + * See what the current blocksize for the device is, and + * use that as the blocksize. Otherwise (or if the blocksize + * is smaller than the default) use the default. + * This is important for devices that have a hardware + * sectorsize that is larger than the default. + */ + blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + if (!blocksize) { + printk ("EXT2-fs: unable to set blocksize\n"); + goto failed_sbi; + } + + /* + * If the superblock doesn't start on a hardware sector boundary, + * calculate the offset. + */ + if (blocksize != BLOCK_SIZE) { + logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; + offset = (sb_block*BLOCK_SIZE) % blocksize; + } else { + logic_sb_block = sb_block; + } + + if (!(bh = sb_bread(sb, logic_sb_block))) { + printk ("EXT2-fs: unable to read superblock\n"); + goto failed_sbi; + } + /* + * Note: s_es must be initialized as soon as possible because + * some ext2 macro-instructions depend on its value + */ + es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); + sbi->s_es = es; + sb->s_magic = le16_to_cpu(es->s_magic); + + if (sb->s_magic != EXT2_SUPER_MAGIC) + goto cantfind_ext2; + + /* Set defaults before we parse the mount options */ + def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + if (def_mount_opts & EXT2_DEFM_DEBUG) + set_opt(sbi->s_mount_opt, DEBUG); + if (def_mount_opts & EXT2_DEFM_BSDGROUPS) + set_opt(sbi->s_mount_opt, GRPID); + if (def_mount_opts & EXT2_DEFM_UID16) + set_opt(sbi->s_mount_opt, NO_UID32); + if (def_mount_opts & EXT2_DEFM_XATTR_USER) + set_opt(sbi->s_mount_opt, XATTR_USER); + if (def_mount_opts & EXT2_DEFM_ACL) + set_opt(sbi->s_mount_opt, POSIX_ACL); + + if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) + set_opt(sbi->s_mount_opt, ERRORS_PANIC); + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) + set_opt(sbi->s_mount_opt, ERRORS_RO); + + sbi->s_resuid = le16_to_cpu(es->s_def_resuid); + sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + + if (!parse_options ((char *) data, sbi)) + goto failed_mount; + + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? + MS_POSIXACL : 0); + + if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && + (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || + EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || + EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U))) + printk("EXT2-fs warning: feature flags set on rev 0 fs, " + "running e2fsck is recommended\n"); + /* + * Check feature flags regardless of the revision level, since we + * previously didn't change the revision level when setting the flags, + * so there is a chance incompat flags are set on a rev 0 filesystem. + */ + features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP); + if (features) { + printk("EXT2-fs: %s: couldn't mount because of " + "unsupported optional features (%x).\n", + sb->s_id, le32_to_cpu(features)); + goto failed_mount; + } + if (!(sb->s_flags & MS_RDONLY) && + (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ + printk("EXT2-fs: %s: couldn't mount RDWR because of " + "unsupported optional features (%x).\n", + sb->s_id, le32_to_cpu(features)); + goto failed_mount; + } + + blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + + /* If the blocksize doesn't match, re-read the thing.. */ + if (sb->s_blocksize != blocksize) { + brelse(bh); + + if (!sb_set_blocksize(sb, blocksize)) { + printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n"); + goto failed_sbi; + } + + logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; + offset = (sb_block*BLOCK_SIZE) % blocksize; + bh = sb_bread(sb, logic_sb_block); + if(!bh) { + printk("EXT2-fs: Couldn't read superblock on " + "2nd try.\n"); + goto failed_sbi; + } + es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); + sbi->s_es = es; + if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { + printk ("EXT2-fs: Magic mismatch, very weird !\n"); + goto failed_mount; + } + } + + sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); + + if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { + sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; + sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO; + } else { + sbi->s_inode_size = le16_to_cpu(es->s_inode_size); + sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) || + (sbi->s_inode_size & (sbi->s_inode_size - 1)) || + (sbi->s_inode_size > blocksize)) { + printk ("EXT2-fs: unsupported inode size: %d\n", + sbi->s_inode_size); + goto failed_mount; + } + } + + sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << + le32_to_cpu(es->s_log_frag_size); + if (sbi->s_frag_size == 0) + goto cantfind_ext2; + sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; + + sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); + sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); + sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); + + if (EXT2_INODE_SIZE(sb) == 0) + goto cantfind_ext2; + sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); + if (sbi->s_inodes_per_block == 0) + goto cantfind_ext2; + sbi->s_itb_per_group = sbi->s_inodes_per_group / + sbi->s_inodes_per_block; + sbi->s_desc_per_block = sb->s_blocksize / + sizeof (struct ext2_group_desc); + sbi->s_sbh = bh; + sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_addr_per_block_bits = + log2 (EXT2_ADDR_PER_BLOCK(sb)); + sbi->s_desc_per_block_bits = + log2 (EXT2_DESC_PER_BLOCK(sb)); + + if (sb->s_magic != EXT2_SUPER_MAGIC) + goto cantfind_ext2; + + if (sb->s_blocksize != bh->b_size) { + if (!silent) + printk ("VFS: Unsupported blocksize on dev " + "%s.\n", sb->s_id); + goto failed_mount; + } + + if (sb->s_blocksize != sbi->s_frag_size) { + printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", + sbi->s_frag_size, sb->s_blocksize); + goto failed_mount; + } + + if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { + printk ("EXT2-fs: #blocks per group too big: %lu\n", + sbi->s_blocks_per_group); + goto failed_mount; + } + if (sbi->s_frags_per_group > sb->s_blocksize * 8) { + printk ("EXT2-fs: #fragments per group too big: %lu\n", + sbi->s_frags_per_group); + goto failed_mount; + } + if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { + printk ("EXT2-fs: #inodes per group too big: %lu\n", + sbi->s_inodes_per_group); + goto failed_mount; + } + + if (EXT2_BLOCKS_PER_GROUP(sb) == 0) + goto cantfind_ext2; + sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) + + EXT2_BLOCKS_PER_GROUP(sb) - 1) / + EXT2_BLOCKS_PER_GROUP(sb); + db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); + if (sbi->s_group_desc == NULL) { + printk ("EXT2-fs: not enough memory\n"); + goto failed_mount; + } + percpu_counter_init(&sbi->s_freeblocks_counter); + percpu_counter_init(&sbi->s_freeinodes_counter); + percpu_counter_init(&sbi->s_dirs_counter); + bgl_lock_init(&sbi->s_blockgroup_lock); + sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), + GFP_KERNEL); + if (!sbi->s_debts) { + printk ("EXT2-fs: not enough memory\n"); + goto failed_mount_group_desc; + } + memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts)); + for (i = 0; i < db_count; i++) { + block = descriptor_loc(sb, logic_sb_block, i); + sbi->s_group_desc[i] = sb_bread(sb, block); + if (!sbi->s_group_desc[i]) { + for (j = 0; j < i; j++) + brelse (sbi->s_group_desc[j]); + printk ("EXT2-fs: unable to read group descriptors\n"); + goto failed_mount_group_desc; + } + } + if (!ext2_check_descriptors (sb)) { + printk ("EXT2-fs: group descriptors corrupted!\n"); + db_count = i; + goto failed_mount2; + } + sbi->s_gdb_count = db_count; + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + spin_lock_init(&sbi->s_next_gen_lock); + /* + * set up enough so that it can read an inode + */ + sb->s_op = &ext2_sops; + sb->s_export_op = &ext2_export_ops; + sb->s_xattr = ext2_xattr_handlers; + root = iget(sb, EXT2_ROOT_INO); + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + goto failed_mount2; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + dput(sb->s_root); + sb->s_root = NULL; + printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); + goto failed_mount2; + } + if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) + ext2_warning(sb, __FUNCTION__, + "mounting ext3 filesystem as ext2\n"); + ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); + percpu_counter_mod(&sbi->s_freeblocks_counter, + ext2_count_free_blocks(sb)); + percpu_counter_mod(&sbi->s_freeinodes_counter, + ext2_count_free_inodes(sb)); + percpu_counter_mod(&sbi->s_dirs_counter, + ext2_count_dirs(sb)); + return 0; + +cantfind_ext2: + if (!silent) + printk("VFS: Can't find an ext2 filesystem on dev %s.\n", + sb->s_id); + goto failed_mount; + +failed_mount2: + for (i = 0; i < db_count; i++) + brelse(sbi->s_group_desc[i]); +failed_mount_group_desc: + kfree(sbi->s_group_desc); + kfree(sbi->s_debts); +failed_mount: + brelse(bh); +failed_sbi: + sb->s_fs_info = NULL; + kfree(sbi); + return -EINVAL; +} + +static void ext2_commit_super (struct super_block * sb, + struct ext2_super_block * es) +{ + es->s_wtime = cpu_to_le32(get_seconds()); + mark_buffer_dirty(EXT2_SB(sb)->s_sbh); + sb->s_dirt = 0; +} + +static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) +{ + es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); + es->s_wtime = cpu_to_le32(get_seconds()); + mark_buffer_dirty(EXT2_SB(sb)->s_sbh); + sync_dirty_buffer(EXT2_SB(sb)->s_sbh); + sb->s_dirt = 0; +} + +/* + * In the second extended file system, it is not necessary to + * write the super block since we use a mapping of the + * disk super block in a buffer. + * + * However, this function is still used to set the fs valid + * flags to 0. We need to set this flag to 0 since the fs + * may have been checked while mounted and e2fsck may have + * set s_state to EXT2_VALID_FS after some corrections. + */ + +void ext2_write_super (struct super_block * sb) +{ + struct ext2_super_block * es; + lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) { + es = EXT2_SB(sb)->s_es; + + if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) { + ext2_debug ("setting valid to 0\n"); + es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & + ~EXT2_VALID_FS); + es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); + es->s_mtime = cpu_to_le32(get_seconds()); + ext2_sync_super(sb, es); + } else + ext2_commit_super (sb, es); + } + sb->s_dirt = 0; + unlock_kernel(); +} + +static int ext2_remount (struct super_block * sb, int * flags, char * data) +{ + struct ext2_sb_info * sbi = EXT2_SB(sb); + struct ext2_super_block * es; + + /* + * Allow the "check" option to be passed as a remount option. + */ + if (!parse_options (data, sbi)) + return -EINVAL; + + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + + es = sbi->s_es; + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + return 0; + if (*flags & MS_RDONLY) { + if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || + !(sbi->s_mount_state & EXT2_VALID_FS)) + return 0; + /* + * OK, we are remounting a valid rw partition rdonly, so set + * the rdonly flag and then mark the partition as valid again. + */ + es->s_state = cpu_to_le16(sbi->s_mount_state); + es->s_mtime = cpu_to_le32(get_seconds()); + } else { + __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, + ~EXT2_FEATURE_RO_COMPAT_SUPP); + if (ret) { + printk("EXT2-fs: %s: couldn't remount RDWR because of " + "unsupported optional features (%x).\n", + sb->s_id, le32_to_cpu(ret)); + return -EROFS; + } + /* + * Mounting a RDONLY partition read-write, so reread and + * store the current valid flag. (It may have been changed + * by e2fsck since we originally mounted the partition.) + */ + sbi->s_mount_state = le16_to_cpu(es->s_state); + if (!ext2_setup_super (sb, es, 0)) + sb->s_flags &= ~MS_RDONLY; + } + ext2_sync_super(sb, es); + return 0; +} + +static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned long overhead; + int i; + + if (test_opt (sb, MINIX_DF)) + overhead = 0; + else { + /* + * Compute the overhead (FS structures) + */ + + /* + * All of the blocks before first_data_block are + * overhead + */ + overhead = le32_to_cpu(sbi->s_es->s_first_data_block); + + /* + * Add the overhead attributed to the superblock and + * block group descriptors. If the sparse superblocks + * feature is turned on, then not all groups have this. + */ + for (i = 0; i < sbi->s_groups_count; i++) + overhead += ext2_bg_has_super(sb, i) + + ext2_bg_num_gdb(sb, i); + + /* + * Every block group has an inode bitmap, a block + * bitmap, and an inode table. + */ + overhead += (sbi->s_groups_count * + (2 + sbi->s_itb_per_group)); + } + + buf->f_type = EXT2_SUPER_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead; + buf->f_bfree = ext2_count_free_blocks(sb); + buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count); + if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count)) + buf->f_bavail = 0; + buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); + buf->f_ffree = ext2_count_free_inodes (sb); + buf->f_namelen = EXT2_NAME_LEN; + return 0; +} + +static struct super_block *ext2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); +} + +#ifdef CONFIG_QUOTA + +/* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) + * we don't have to be afraid of races */ +static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + struct buffer_head tmp_bh; + struct buffer_head *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off+len > i_size) + len = i_size-off; + toread = len; + while (toread > 0) { + tocopy = sb->s_blocksize - offset < toread ? + sb->s_blocksize - offset : toread; + + tmp_bh.b_state = 0; + err = ext2_get_block(inode, blk, &tmp_bh, 0); + if (err) + return err; + if (!buffer_mapped(&tmp_bh)) /* A hole? */ + memset(data, 0, tocopy); + else { + bh = sb_bread(sb, tmp_bh.b_blocknr); + if (!bh) + return -EIO; + memcpy(data, bh->b_data+offset, tocopy); + brelse(bh); + } + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t ext2_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t towrite = len; + struct buffer_head tmp_bh; + struct buffer_head *bh; + + down(&inode->i_sem); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + + tmp_bh.b_state = 0; + err = ext2_get_block(inode, blk, &tmp_bh, 1); + if (err) + goto out; + if (offset || tocopy != EXT2_BLOCK_SIZE(sb)) + bh = sb_bread(sb, tmp_bh.b_blocknr); + else + bh = sb_getblk(sb, tmp_bh.b_blocknr); + if (!bh) { + err = -EIO; + goto out; + } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, tocopy); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + brelse(bh); + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } +out: + if (len == towrite) + return err; + if (inode->i_size < off+len-towrite) + i_size_write(inode, off+len-towrite); + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + up(&inode->i_sem); + return len - towrite; +} + +#endif + +static struct file_system_type ext2_fs_type = { + .owner = THIS_MODULE, + .name = "ext2", + .get_sb = ext2_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_ext2_fs(void) +{ + int err = init_ext2_xattr(); + if (err) + return err; + err = init_inodecache(); + if (err) + goto out1; + err = register_filesystem(&ext2_fs_type); + if (err) + goto out; + return 0; +out: + destroy_inodecache(); +out1: + exit_ext2_xattr(); + return err; +} + +static void __exit exit_ext2_fs(void) +{ + unregister_filesystem(&ext2_fs_type); + destroy_inodecache(); + exit_ext2_xattr(); +} + +module_init(init_ext2_fs) +module_exit(exit_ext2_fs) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c new file mode 100644 index 000000000000..9f7bac01d557 --- /dev/null +++ b/fs/ext2/symlink.c @@ -0,0 +1,52 @@ +/* + * linux/fs/ext2/symlink.c + * + * Only fast symlinks left here - the rest is done by generic code. AV, 1999 + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 symlink handling code + */ + +#include "ext2.h" +#include "xattr.h" +#include <linux/namei.h> + +static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); + nd_set_link(nd, (char *)ei->i_data); + return 0; +} + +struct inode_operations ext2_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +#ifdef CONFIG_EXT2_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext2_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +struct inode_operations ext2_fast_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ext2_follow_link, +#ifdef CONFIG_EXT2_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext2_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c new file mode 100644 index 000000000000..27982b500e84 --- /dev/null +++ b/fs/ext2/xattr.c @@ -0,0 +1,1043 @@ +/* + * linux/fs/ext2/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de> + * + * Fix by Harrison Xing <harrison@mountainviewdata.com>. + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko <luka.renko@hermes.si>. + * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, + * Red Hat Inc. + * + */ + +/* + * Extended attributes are stored on disk blocks allocated outside of + * any inode. The i_file_acl field is then made to point to this allocated + * block. If all extended attributes of an inode are identical, these + * inodes may share the same extended attribute block. Such situations + * are automatically detected by keeping a cache of recent attribute block + * numbers and hashes over the block's contents in memory. + * + * + * Extended attribute block layout: + * + * +------------------+ + * | header | + * | entry 1 | | + * | entry 2 | | growing downwards + * | entry 3 | v + * | four null bytes | + * | . . . | + * | value 1 | ^ + * | value 3 | | growing upwards + * | value 2 | | + * +------------------+ + * + * The block header is followed by multiple entry descriptors. These entry + * descriptors are variable in size, and alligned to EXT2_XATTR_PAD + * byte boundaries. The entry descriptors are sorted by attribute name, + * so that two extended attribute blocks can be compared efficiently. + * + * Attribute values are aligned to the end of the block, stored in + * no specific order. They are also padded to EXT2_XATTR_PAD byte + * boundaries. No additional gaps are left between them. + * + * Locking strategy + * ---------------- + * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. + */ + +#include <linux/buffer_head.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/mbcache.h> +#include <linux/quotaops.h> +#include <linux/rwsem.h> +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) +#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) +#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) +#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#ifdef EXT2_XATTR_DEBUG +# define ea_idebug(inode, f...) do { \ + printk(KERN_DEBUG "inode %s:%ld: ", \ + inode->i_sb->s_id, inode->i_ino); \ + printk(f); \ + printk("\n"); \ + } while (0) +# define ea_bdebug(bh, f...) do { \ + char b[BDEVNAME_SIZE]; \ + printk(KERN_DEBUG "block %s:%lu: ", \ + bdevname(bh->b_bdev, b), \ + (unsigned long) bh->b_blocknr); \ + printk(f); \ + printk("\n"); \ + } while (0) +#else +# define ea_idebug(f...) +# define ea_bdebug(f...) +#endif + +static int ext2_xattr_set2(struct inode *, struct buffer_head *, + struct ext2_xattr_header *); + +static int ext2_xattr_cache_insert(struct buffer_head *); +static struct buffer_head *ext2_xattr_cache_find(struct inode *, + struct ext2_xattr_header *); +static void ext2_xattr_rehash(struct ext2_xattr_header *, + struct ext2_xattr_entry *); + +static struct mb_cache *ext2_xattr_cache; + +static struct xattr_handler *ext2_xattr_handler_map[] = { + [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, +#ifdef CONFIG_EXT2_FS_POSIX_ACL + [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, + [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler, +#endif + [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler, +#ifdef CONFIG_EXT2_FS_SECURITY + [EXT2_XATTR_INDEX_SECURITY] = &ext2_xattr_security_handler, +#endif +}; + +struct xattr_handler *ext2_xattr_handlers[] = { + &ext2_xattr_user_handler, + &ext2_xattr_trusted_handler, +#ifdef CONFIG_EXT2_FS_POSIX_ACL + &ext2_xattr_acl_access_handler, + &ext2_xattr_acl_default_handler, +#endif +#ifdef CONFIG_EXT2_FS_SECURITY + &ext2_xattr_security_handler, +#endif + NULL +}; + +static inline struct xattr_handler * +ext2_xattr_handler(int name_index) +{ + struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) + handler = ext2_xattr_handler_map[name_index]; + return handler; +} + +/* + * ext2_xattr_get() + * + * Copy an extended attribute into the buffer + * provided, or compute the buffer size required. + * Buffer is NULL to compute the size of the buffer required. + * + * Returns a negative error number on failure, or the number of bytes + * used / required on success. + */ +int +ext2_xattr_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct buffer_head *bh = NULL; + struct ext2_xattr_entry *entry; + size_t name_len, size; + char *end; + int error; + + ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", + name_index, name, buffer, (long)buffer_size); + + if (name == NULL) + return -EINVAL; + down_read(&EXT2_I(inode)->xattr_sem); + error = -ENODATA; + if (!EXT2_I(inode)->i_file_acl) + goto cleanup; + ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); + bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + ea_bdebug(bh, "b_count=%d, refcount=%d", + atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); + end = bh->b_data + bh->b_size; + if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + HDR(bh)->h_blocks != cpu_to_le32(1)) { +bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", + "inode %ld: bad block %d", inode->i_ino, + EXT2_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + /* find named attribute */ + name_len = strlen(name); + + error = -ERANGE; + if (name_len > 255) + goto cleanup; + entry = FIRST_ENTRY(bh); + while (!IS_LAST_ENTRY(entry)) { + struct ext2_xattr_entry *next = + EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + goto bad_block; + if (name_index == entry->e_name_index && + name_len == entry->e_name_len && + memcmp(name, entry->e_name, name_len) == 0) + goto found; + entry = next; + } + /* Check the remaining name entries */ + while (!IS_LAST_ENTRY(entry)) { + struct ext2_xattr_entry *next = + EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + goto bad_block; + entry = next; + } + if (ext2_xattr_cache_insert(bh)) + ea_idebug(inode, "cache insert failed"); + error = -ENODATA; + goto cleanup; +found: + /* check the buffer size */ + if (entry->e_value_block != 0) + goto bad_block; + size = le32_to_cpu(entry->e_value_size); + if (size > inode->i_sb->s_blocksize || + le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) + goto bad_block; + + if (ext2_xattr_cache_insert(bh)) + ea_idebug(inode, "cache insert failed"); + if (buffer) { + error = -ERANGE; + if (size > buffer_size) + goto cleanup; + /* return value of attribute */ + memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), + size); + } + error = size; + +cleanup: + brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); + + return error; +} + +/* + * ext2_xattr_list() + * + * Copy a list of attribute names into the buffer + * provided, or compute the buffer size required. + * Buffer is NULL to compute the size of the buffer required. + * + * Returns a negative error number on failure, or the number of bytes + * used / required on success. + */ +static int +ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) +{ + struct buffer_head *bh = NULL; + struct ext2_xattr_entry *entry; + char *end; + size_t rest = buffer_size; + int error; + + ea_idebug(inode, "buffer=%p, buffer_size=%ld", + buffer, (long)buffer_size); + + down_read(&EXT2_I(inode)->xattr_sem); + error = 0; + if (!EXT2_I(inode)->i_file_acl) + goto cleanup; + ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); + bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + ea_bdebug(bh, "b_count=%d, refcount=%d", + atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); + end = bh->b_data + bh->b_size; + if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + HDR(bh)->h_blocks != cpu_to_le32(1)) { +bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", + "inode %ld: bad block %d", inode->i_ino, + EXT2_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + + /* check the on-disk data structure */ + entry = FIRST_ENTRY(bh); + while (!IS_LAST_ENTRY(entry)) { + struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); + + if ((char *)next >= end) + goto bad_block; + entry = next; + } + if (ext2_xattr_cache_insert(bh)) + ea_idebug(inode, "cache insert failed"); + + /* list the attribute names */ + for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); + entry = EXT2_XATTR_NEXT(entry)) { + struct xattr_handler *handler = + ext2_xattr_handler(entry->e_name_index); + + if (handler) { + size_t size = handler->list(inode, buffer, rest, + entry->e_name, + entry->e_name_len); + if (buffer) { + if (size > rest) { + error = -ERANGE; + goto cleanup; + } + buffer += size; + } + rest -= size; + } + } + error = buffer_size - rest; /* total size */ + +cleanup: + brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); + + return error; +} + +/* + * Inode operation listxattr() + * + * dentry->d_inode->i_sem: don't care + */ +ssize_t +ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + return ext2_xattr_list(dentry->d_inode, buffer, size); +} + +/* + * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is + * not set, set it. + */ +static void ext2_xattr_update_super_block(struct super_block *sb) +{ + if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) + return; + + lock_super(sb); + EXT2_SB(sb)->s_es->s_feature_compat |= + cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); + sb->s_dirt = 1; + mark_buffer_dirty(EXT2_SB(sb)->s_sbh); + unlock_super(sb); +} + +/* + * ext2_xattr_set() + * + * Create, replace or remove an extended attribute for this inode. Buffer + * is NULL to remove an existing extended attribute, and non-NULL to + * either replace an existing extended attribute, or create a new extended + * attribute. The flags XATTR_REPLACE and XATTR_CREATE + * specify that an extended attribute must exist and must not exist + * previous to the call, respectively. + * + * Returns 0, or a negative error number on failure. + */ +int +ext2_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, int flags) +{ + struct super_block *sb = inode->i_sb; + struct buffer_head *bh = NULL; + struct ext2_xattr_header *header = NULL; + struct ext2_xattr_entry *here, *last; + size_t name_len, free, min_offs = sb->s_blocksize; + int not_found = 1, error; + char *end; + + /* + * header -- Points either into bh, or to a temporarily + * allocated buffer. + * here -- The named entry found, or the place for inserting, within + * the block pointed to by header. + * last -- Points right after the last named entry within the block + * pointed to by header. + * min_offs -- The offset of the first value (values are aligned + * towards the end of the block). + * end -- Points right after the block pointed to by header. + */ + + ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", + name_index, name, value, (long)value_len); + + if (IS_RDONLY(inode)) + return -EROFS; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + if (value == NULL) + value_len = 0; + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + if (name_len > 255 || value_len > sb->s_blocksize) + return -ERANGE; + down_write(&EXT2_I(inode)->xattr_sem); + if (EXT2_I(inode)->i_file_acl) { + /* The inode already has an extended attribute block. */ + bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + ea_bdebug(bh, "b_count=%d, refcount=%d", + atomic_read(&(bh->b_count)), + le32_to_cpu(HDR(bh)->h_refcount)); + header = HDR(bh); + end = bh->b_data + bh->b_size; + if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + header->h_blocks != cpu_to_le32(1)) { +bad_block: ext2_error(sb, "ext2_xattr_set", + "inode %ld: bad block %d", inode->i_ino, + EXT2_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + /* Find the named attribute. */ + here = FIRST_ENTRY(bh); + while (!IS_LAST_ENTRY(here)) { + struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); + if ((char *)next >= end) + goto bad_block; + if (!here->e_value_block && here->e_value_size) { + size_t offs = le16_to_cpu(here->e_value_offs); + if (offs < min_offs) + min_offs = offs; + } + not_found = name_index - here->e_name_index; + if (!not_found) + not_found = name_len - here->e_name_len; + if (!not_found) + not_found = memcmp(name, here->e_name,name_len); + if (not_found <= 0) + break; + here = next; + } + last = here; + /* We still need to compute min_offs and last. */ + while (!IS_LAST_ENTRY(last)) { + struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); + if ((char *)next >= end) + goto bad_block; + if (!last->e_value_block && last->e_value_size) { + size_t offs = le16_to_cpu(last->e_value_offs); + if (offs < min_offs) + min_offs = offs; + } + last = next; + } + + /* Check whether we have enough space left. */ + free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); + } else { + /* We will use a new extended attribute block. */ + free = sb->s_blocksize - + sizeof(struct ext2_xattr_header) - sizeof(__u32); + here = last = NULL; /* avoid gcc uninitialized warning. */ + } + + if (not_found) { + /* Request to remove a nonexistent attribute? */ + error = -ENODATA; + if (flags & XATTR_REPLACE) + goto cleanup; + error = 0; + if (value == NULL) + goto cleanup; + } else { + /* Request to create an existing attribute? */ + error = -EEXIST; + if (flags & XATTR_CREATE) + goto cleanup; + if (!here->e_value_block && here->e_value_size) { + size_t size = le32_to_cpu(here->e_value_size); + + if (le16_to_cpu(here->e_value_offs) + size > + sb->s_blocksize || size > sb->s_blocksize) + goto bad_block; + free += EXT2_XATTR_SIZE(size); + } + free += EXT2_XATTR_LEN(name_len); + } + error = -ENOSPC; + if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len)) + goto cleanup; + + /* Here we know that we can set the new attribute. */ + + if (header) { + struct mb_cache_entry *ce; + + /* assert(header == HDR(bh)); */ + ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, + bh->b_blocknr); + lock_buffer(bh); + if (header->h_refcount == cpu_to_le32(1)) { + ea_bdebug(bh, "modifying in-place"); + if (ce) + mb_cache_entry_free(ce); + /* keep the buffer locked while modifying it. */ + } else { + int offset; + + if (ce) + mb_cache_entry_release(ce); + unlock_buffer(bh); + ea_bdebug(bh, "cloning"); + header = kmalloc(bh->b_size, GFP_KERNEL); + error = -ENOMEM; + if (header == NULL) + goto cleanup; + memcpy(header, HDR(bh), bh->b_size); + header->h_refcount = cpu_to_le32(1); + + offset = (char *)here - bh->b_data; + here = ENTRY((char *)header + offset); + offset = (char *)last - bh->b_data; + last = ENTRY((char *)header + offset); + } + } else { + /* Allocate a buffer where we construct the new block. */ + header = kmalloc(sb->s_blocksize, GFP_KERNEL); + error = -ENOMEM; + if (header == NULL) + goto cleanup; + memset(header, 0, sb->s_blocksize); + end = (char *)header + sb->s_blocksize; + header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); + header->h_blocks = header->h_refcount = cpu_to_le32(1); + last = here = ENTRY(header+1); + } + + /* Iff we are modifying the block in-place, bh is locked here. */ + + if (not_found) { + /* Insert the new name. */ + size_t size = EXT2_XATTR_LEN(name_len); + size_t rest = (char *)last - (char *)here; + memmove((char *)here + size, here, rest); + memset(here, 0, size); + here->e_name_index = name_index; + here->e_name_len = name_len; + memcpy(here->e_name, name, name_len); + } else { + if (!here->e_value_block && here->e_value_size) { + char *first_val = (char *)header + min_offs; + size_t offs = le16_to_cpu(here->e_value_offs); + char *val = (char *)header + offs; + size_t size = EXT2_XATTR_SIZE( + le32_to_cpu(here->e_value_size)); + + if (size == EXT2_XATTR_SIZE(value_len)) { + /* The old and the new value have the same + size. Just replace. */ + here->e_value_size = cpu_to_le32(value_len); + memset(val + size - EXT2_XATTR_PAD, 0, + EXT2_XATTR_PAD); /* Clear pad bytes. */ + memcpy(val, value, value_len); + goto skip_replace; + } + + /* Remove the old value. */ + memmove(first_val + size, first_val, val - first_val); + memset(first_val, 0, size); + here->e_value_offs = 0; + min_offs += size; + + /* Adjust all value offsets. */ + last = ENTRY(header+1); + while (!IS_LAST_ENTRY(last)) { + size_t o = le16_to_cpu(last->e_value_offs); + if (!last->e_value_block && o < offs) + last->e_value_offs = + cpu_to_le16(o + size); + last = EXT2_XATTR_NEXT(last); + } + } + if (value == NULL) { + /* Remove the old name. */ + size_t size = EXT2_XATTR_LEN(name_len); + last = ENTRY((char *)last - size); + memmove(here, (char*)here + size, + (char*)last - (char*)here); + memset(last, 0, size); + } + } + + if (value != NULL) { + /* Insert the new value. */ + here->e_value_size = cpu_to_le32(value_len); + if (value_len) { + size_t size = EXT2_XATTR_SIZE(value_len); + char *val = (char *)header + min_offs - size; + here->e_value_offs = + cpu_to_le16((char *)val - (char *)header); + memset(val + size - EXT2_XATTR_PAD, 0, + EXT2_XATTR_PAD); /* Clear the pad bytes. */ + memcpy(val, value, value_len); + } + } + +skip_replace: + if (IS_LAST_ENTRY(ENTRY(header+1))) { + /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ + error = ext2_xattr_set2(inode, bh, NULL); + } else { + ext2_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ + error = ext2_xattr_set2(inode, bh, header); + } + +cleanup: + brelse(bh); + if (!(bh && header == HDR(bh))) + kfree(header); + up_write(&EXT2_I(inode)->xattr_sem); + + return error; +} + +/* + * Second half of ext2_xattr_set(): Update the file system. + */ +static int +ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, + struct ext2_xattr_header *header) +{ + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh = NULL; + int error; + + if (header) { + new_bh = ext2_xattr_cache_find(inode, header); + if (new_bh) { + /* We found an identical block in the cache. */ + if (new_bh == old_bh) { + ea_bdebug(new_bh, "keeping this block"); + } else { + /* The old block is released after updating + the inode. */ + ea_bdebug(new_bh, "reusing block"); + + error = -EDQUOT; + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); + goto cleanup; + } + HDR(new_bh)->h_refcount = cpu_to_le32(1 + + le32_to_cpu(HDR(new_bh)->h_refcount)); + ea_bdebug(new_bh, "refcount now=%d", + le32_to_cpu(HDR(new_bh)->h_refcount)); + } + unlock_buffer(new_bh); + } else if (old_bh && header == HDR(old_bh)) { + /* Keep this block. No need to lock the block as we + don't need to change the reference count. */ + new_bh = old_bh; + get_bh(new_bh); + ext2_xattr_cache_insert(new_bh); + } else { + /* We need to allocate a new block */ + int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> + s_first_data_block) + + EXT2_I(inode)->i_block_group * + EXT2_BLOCKS_PER_GROUP(sb); + int block = ext2_new_block(inode, goal, + NULL, NULL, &error); + if (error) + goto cleanup; + ea_idebug(inode, "creating block %d", block); + + new_bh = sb_getblk(sb, block); + if (!new_bh) { + ext2_free_blocks(inode, block, 1); + error = -EIO; + goto cleanup; + } + lock_buffer(new_bh); + memcpy(new_bh->b_data, header, new_bh->b_size); + set_buffer_uptodate(new_bh); + unlock_buffer(new_bh); + ext2_xattr_cache_insert(new_bh); + + ext2_xattr_update_super_block(sb); + } + mark_buffer_dirty(new_bh); + if (IS_SYNC(inode)) { + sync_dirty_buffer(new_bh); + error = -EIO; + if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) + goto cleanup; + } + } + + /* Update the inode. */ + EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; + inode->i_ctime = CURRENT_TIME_SEC; + if (IS_SYNC(inode)) { + error = ext2_sync_inode (inode); + /* In case sync failed due to ENOSPC the inode was actually + * written (only some dirty data were not) so we just proceed + * as if nothing happened and cleanup the unused block */ + if (error && error != -ENOSPC) { + if (new_bh && new_bh != old_bh) + DQUOT_FREE_BLOCK(inode, 1); + goto cleanup; + } + } else + mark_inode_dirty(inode); + + error = 0; + if (old_bh && old_bh != new_bh) { + struct mb_cache_entry *ce; + + /* + * If there was an old block and we are no longer using it, + * release the old block. + */ + ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev, + old_bh->b_blocknr); + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + /* Free the old block. */ + if (ce) + mb_cache_entry_free(ce); + ea_bdebug(old_bh, "freeing"); + ext2_free_blocks(inode, old_bh->b_blocknr, 1); + /* We let our caller release old_bh, so we + * need to duplicate the buffer before. */ + get_bh(old_bh); + bforget(old_bh); + } else { + /* Decrement the refcount only. */ + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); + if (ce) + mb_cache_entry_release(ce); + DQUOT_FREE_BLOCK(inode, 1); + mark_buffer_dirty(old_bh); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); + } + unlock_buffer(old_bh); + } + +cleanup: + brelse(new_bh); + + return error; +} + +/* + * ext2_xattr_delete_inode() + * + * Free extended attribute resources associated with this inode. This + * is called immediately before an inode is freed. + */ +void +ext2_xattr_delete_inode(struct inode *inode) +{ + struct buffer_head *bh = NULL; + struct mb_cache_entry *ce; + + down_write(&EXT2_I(inode)->xattr_sem); + if (!EXT2_I(inode)->i_file_acl) + goto cleanup; + bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + if (!bh) { + ext2_error(inode->i_sb, "ext2_xattr_delete_inode", + "inode %ld: block %d read error", inode->i_ino, + EXT2_I(inode)->i_file_acl); + goto cleanup; + } + ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); + if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + HDR(bh)->h_blocks != cpu_to_le32(1)) { + ext2_error(inode->i_sb, "ext2_xattr_delete_inode", + "inode %ld: bad block %d", inode->i_ino, + EXT2_I(inode)->i_file_acl); + goto cleanup; + } + ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr); + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + if (ce) + mb_cache_entry_free(ce); + ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); + get_bh(bh); + bforget(bh); + } else { + HDR(bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(bh)->h_refcount) - 1); + if (ce) + mb_cache_entry_release(ce); + mark_buffer_dirty(bh); + if (IS_SYNC(inode)) + sync_dirty_buffer(bh); + DQUOT_FREE_BLOCK(inode, 1); + } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); + EXT2_I(inode)->i_file_acl = 0; + +cleanup: + brelse(bh); + up_write(&EXT2_I(inode)->xattr_sem); +} + +/* + * ext2_xattr_put_super() + * + * This is called when a file system is unmounted. + */ +void +ext2_xattr_put_super(struct super_block *sb) +{ + mb_cache_shrink(ext2_xattr_cache, sb->s_bdev); +} + + +/* + * ext2_xattr_cache_insert() + * + * Create a new entry in the extended attribute cache, and insert + * it unless such an entry is already in the cache. + * + * Returns 0, or a negative error number on failure. + */ +static int +ext2_xattr_cache_insert(struct buffer_head *bh) +{ + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + struct mb_cache_entry *ce; + int error; + + ce = mb_cache_entry_alloc(ext2_xattr_cache); + if (!ce) + return -ENOMEM; + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + if (error) { + mb_cache_entry_free(ce); + if (error == -EBUSY) { + ea_bdebug(bh, "already in cache (%d cache entries)", + atomic_read(&ext2_xattr_cache->c_entry_count)); + error = 0; + } + } else { + ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, + atomic_read(&ext2_xattr_cache->c_entry_count)); + mb_cache_entry_release(ce); + } + return error; +} + +/* + * ext2_xattr_cmp() + * + * Compare two extended attribute blocks for equality. + * + * Returns 0 if the blocks are equal, 1 if they differ, and + * a negative error number on errors. + */ +static int +ext2_xattr_cmp(struct ext2_xattr_header *header1, + struct ext2_xattr_header *header2) +{ + struct ext2_xattr_entry *entry1, *entry2; + + entry1 = ENTRY(header1+1); + entry2 = ENTRY(header2+1); + while (!IS_LAST_ENTRY(entry1)) { + if (IS_LAST_ENTRY(entry2)) + return 1; + if (entry1->e_hash != entry2->e_hash || + entry1->e_name_index != entry2->e_name_index || + entry1->e_name_len != entry2->e_name_len || + entry1->e_value_size != entry2->e_value_size || + memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) + return 1; + if (entry1->e_value_block != 0 || entry2->e_value_block != 0) + return -EIO; + if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), + (char *)header2 + le16_to_cpu(entry2->e_value_offs), + le32_to_cpu(entry1->e_value_size))) + return 1; + + entry1 = EXT2_XATTR_NEXT(entry1); + entry2 = EXT2_XATTR_NEXT(entry2); + } + if (!IS_LAST_ENTRY(entry2)) + return 1; + return 0; +} + +/* + * ext2_xattr_cache_find() + * + * Find an identical extended attribute block. + * + * Returns a locked buffer head to the block found, or NULL if such + * a block was not found or an error occurred. + */ +static struct buffer_head * +ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) +{ + __u32 hash = le32_to_cpu(header->h_hash); + struct mb_cache_entry *ce; + + if (!header->h_hash) + return NULL; /* never share */ + ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); +again: + ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, + inode->i_sb->s_bdev, hash); + while (ce) { + struct buffer_head *bh; + + if (IS_ERR(ce)) { + if (PTR_ERR(ce) == -EAGAIN) + goto again; + break; + } + + bh = sb_bread(inode->i_sb, ce->e_block); + if (!bh) { + ext2_error(inode->i_sb, "ext2_xattr_cache_find", + "inode %ld: block %ld read error", + inode->i_ino, (unsigned long) ce->e_block); + } else { + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT2_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT2_XATTR_REFCOUNT_MAX); + } else if (!ext2_xattr_cmp(header, HDR(bh))) { + ea_bdebug(bh, "b_count=%d", + atomic_read(&(bh->b_count))); + mb_cache_entry_release(ce); + return bh; + } + unlock_buffer(bh); + brelse(bh); + } + ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + } + return NULL; +} + +#define NAME_HASH_SHIFT 5 +#define VALUE_HASH_SHIFT 16 + +/* + * ext2_xattr_hash_entry() + * + * Compute the hash of an extended attribute. + */ +static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, + struct ext2_xattr_entry *entry) +{ + __u32 hash = 0; + char *name = entry->e_name; + int n; + + for (n=0; n < entry->e_name_len; n++) { + hash = (hash << NAME_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ + *name++; + } + + if (entry->e_value_block == 0 && entry->e_value_size != 0) { + __le32 *value = (__le32 *)((char *)header + + le16_to_cpu(entry->e_value_offs)); + for (n = (le32_to_cpu(entry->e_value_size) + + EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { + hash = (hash << VALUE_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ + le32_to_cpu(*value++); + } + } + entry->e_hash = cpu_to_le32(hash); +} + +#undef NAME_HASH_SHIFT +#undef VALUE_HASH_SHIFT + +#define BLOCK_HASH_SHIFT 16 + +/* + * ext2_xattr_rehash() + * + * Re-compute the extended attribute hash value after an entry has changed. + */ +static void ext2_xattr_rehash(struct ext2_xattr_header *header, + struct ext2_xattr_entry *entry) +{ + struct ext2_xattr_entry *here; + __u32 hash = 0; + + ext2_xattr_hash_entry(header, entry); + here = ENTRY(header+1); + while (!IS_LAST_ENTRY(here)) { + if (!here->e_hash) { + /* Block is not shared if an entry's hash value == 0 */ + hash = 0; + break; + } + hash = (hash << BLOCK_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ + le32_to_cpu(here->e_hash); + here = EXT2_XATTR_NEXT(here); + } + header->h_hash = cpu_to_le32(hash); +} + +#undef BLOCK_HASH_SHIFT + +int __init +init_ext2_xattr(void) +{ + ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, + sizeof(struct mb_cache_entry) + + sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + if (!ext2_xattr_cache) + return -ENOMEM; + return 0; +} + +void +exit_ext2_xattr(void) +{ + mb_cache_destroy(ext2_xattr_cache); +} diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h new file mode 100644 index 000000000000..5f3bfde3b810 --- /dev/null +++ b/fs/ext2/xattr.h @@ -0,0 +1,118 @@ +/* + File: linux/ext2_xattr.h + + On-disk format of extended attributes for the ext2 filesystem. + + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/xattr.h> + +/* Magic value in attribute blocks */ +#define EXT2_XATTR_MAGIC 0xEA020000 + +/* Maximum number of references to one attribute block */ +#define EXT2_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define EXT2_XATTR_INDEX_USER 1 +#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define EXT2_XATTR_INDEX_TRUSTED 4 +#define EXT2_XATTR_INDEX_LUSTRE 5 +#define EXT2_XATTR_INDEX_SECURITY 6 + +struct ext2_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __le32 h_blocks; /* number of disk blocks used */ + __le32 h_hash; /* hash value of all attributes */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct ext2_xattr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __le16 e_value_offs; /* offset in disk block of value */ + __le32 e_value_block; /* disk block attribute is stored on (n/i) */ + __le32 e_value_size; /* size of attribute value */ + __le32 e_hash; /* hash value of name and value */ + char e_name[0]; /* attribute name */ +}; + +#define EXT2_XATTR_PAD_BITS 2 +#define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS) +#define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1) +#define EXT2_XATTR_LEN(name_len) \ + (((name_len) + EXT2_XATTR_ROUND + \ + sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND) +#define EXT2_XATTR_NEXT(entry) \ + ( (struct ext2_xattr_entry *)( \ + (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) ) +#define EXT2_XATTR_SIZE(size) \ + (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) + +# ifdef CONFIG_EXT2_FS_XATTR + +extern struct xattr_handler ext2_xattr_user_handler; +extern struct xattr_handler ext2_xattr_trusted_handler; +extern struct xattr_handler ext2_xattr_acl_access_handler; +extern struct xattr_handler ext2_xattr_acl_default_handler; +extern struct xattr_handler ext2_xattr_security_handler; + +extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); + +extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); +extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); + +extern void ext2_xattr_delete_inode(struct inode *); +extern void ext2_xattr_put_super(struct super_block *); + +extern int init_ext2_xattr(void); +extern void exit_ext2_xattr(void); + +extern struct xattr_handler *ext2_xattr_handlers[]; + +# else /* CONFIG_EXT2_FS_XATTR */ + +static inline int +ext2_xattr_get(struct inode *inode, int name_index, + const char *name, void *buffer, size_t size) +{ + return -EOPNOTSUPP; +} + +static inline int +ext2_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static inline void +ext2_xattr_delete_inode(struct inode *inode) +{ +} + +static inline void +ext2_xattr_put_super(struct super_block *sb) +{ +} + +static inline int +init_ext2_xattr(void) +{ + return 0; +} + +static inline void +exit_ext2_xattr(void) +{ +} + +#define ext2_xattr_handlers NULL + +# endif /* CONFIG_EXT2_FS_XATTR */ + diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c new file mode 100644 index 000000000000..6a6c59fbe599 --- /dev/null +++ b/fs/ext2/xattr_security.c @@ -0,0 +1,53 @@ +/* + * linux/fs/ext2/xattr_security.c + * Handler for storing security labels as extended attributes. + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/smp_lock.h> +#include <linux/ext2_fs.h> +#include "xattr.h" + +static size_t +ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext2_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, + buffer, size); +} + +static int +ext2_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, + value, size, flags); +} + +struct xattr_handler ext2_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = ext2_xattr_security_list, + .get = ext2_xattr_security_get, + .set = ext2_xattr_security_set, +}; diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c new file mode 100644 index 000000000000..52b30ee6a25f --- /dev/null +++ b/fs/ext2/xattr_trusted.c @@ -0,0 +1,64 @@ +/* + * linux/fs/ext2/xattr_trusted.c + * Handler for trusted extended attributes. + * + * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/smp_lock.h> +#include <linux/ext2_fs.h> +#include "xattr.h" + +#define XATTR_TRUSTED_PREFIX "trusted." + +static size_t +ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (!capable(CAP_SYS_ADMIN)) + return 0; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext2_xattr_trusted_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, + buffer, size); +} + +static int +ext2_xattr_trusted_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, + value, size, flags); +} + +struct xattr_handler ext2_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .list = ext2_xattr_trusted_list, + .get = ext2_xattr_trusted_get, + .set = ext2_xattr_trusted_set, +}; diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c new file mode 100644 index 000000000000..0c03ea131a94 --- /dev/null +++ b/fs/ext2/xattr_user.c @@ -0,0 +1,77 @@ +/* + * linux/fs/ext2/xattr_user.c + * Handler for extended user attributes. + * + * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include "ext2.h" +#include "xattr.h" + +#define XATTR_USER_PREFIX "user." + +static size_t +ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (!test_opt(inode->i_sb, XATTR_USER)) + return 0; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_USER_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext2_xattr_user_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + error = permission(inode, MAY_READ, NULL); + if (error) + return error; + + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); +} + +static int +ext2_xattr_user_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + if ( !S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + error = permission(inode, MAY_WRITE, NULL); + if (error) + return error; + + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, + value, size, flags); +} + +struct xattr_handler ext2_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = ext2_xattr_user_list, + .get = ext2_xattr_user_get, + .set = ext2_xattr_user_set, +}; |